hpm_math.h 695 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143151441514515146151471514815149151501515115152151531515415155151561515715158151591516015161151621516315164151651516615167151681516915170151711517215173151741517515176151771517815179151801518115182151831518415185151861518715188151891519015191151921519315194151951519615197151981519915200152011520215203152041520515206152071520815209152101521115212152131521415215152161521715218152191522015221152221522315224152251522615227152281522915230152311523215233152341523515236152371523815239152401524115242152431524415245152461524715248152491525015251152521525315254152551525615257152581525915260152611526215263152641526515266152671526815269152701527115272152731527415275152761527715278152791528015281152821528315284152851528615287152881528915290152911529215293152941529515296152971529815299153001530115302153031530415305153061530715308153091531015311153121531315314153151531615317153181531915320153211532215323153241532515326153271532815329153301533115332153331533415335153361533715338153391534015341153421534315344153451534615347153481534915350153511535215353153541535515356153571535815359153601536115362153631536415365153661536715368153691537015371153721537315374153751537615377153781537915380153811538215383153841538515386153871538815389153901539115392153931539415395153961539715398153991540015401154021540315404154051540615407154081540915410154111541215413154141541515416154171541815419154201542115422154231542415425154261542715428154291543015431154321543315434154351543615437154381543915440154411544215443154441544515446154471544815449154501545115452154531545415455154561545715458154591546015461154621546315464154651546615467154681546915470154711547215473154741547515476154771547815479154801548115482154831548415485154861548715488154891549015491154921549315494154951549615497154981549915500155011550215503155041550515506155071550815509155101551115512155131551415515155161551715518155191552015521155221552315524155251552615527155281552915530155311553215533155341553515536155371553815539155401554115542155431554415545155461554715548155491555015551155521555315554155551555615557155581555915560155611556215563155641556515566155671556815569155701557115572155731557415575155761557715578155791558015581155821558315584155851558615587155881558915590155911559215593155941559515596155971559815599156001560115602156031560415605156061560715608156091561015611156121561315614156151561615617156181561915620156211562215623156241562515626156271562815629156301563115632156331563415635156361563715638156391564015641156421564315644156451564615647156481564915650156511565215653156541565515656156571565815659156601566115662156631566415665156661566715668156691567015671156721567315674156751567615677156781567915680156811568215683156841568515686156871568815689156901569115692156931569415695156961569715698156991570015701157021570315704157051570615707157081570915710157111571215713157141571515716157171571815719157201572115722157231572415725157261572715728157291573015731157321573315734157351573615737157381573915740157411574215743157441574515746157471574815749157501575115752157531575415755157561575715758157591576015761157621576315764157651576615767157681576915770157711577215773157741577515776157771577815779157801578115782157831578415785157861578715788157891579015791157921579315794157951579615797157981579915800158011580215803158041580515806158071580815809158101581115812158131581415815158161581715818158191582015821158221582315824158251582615827158281582915830158311583215833158341583515836158371583815839158401584115842158431584415845158461584715848158491585015851158521585315854158551585615857158581585915860158611586215863158641586515866158671586815869158701587115872158731587415875158761587715878158791588015881158821588315884158851588615887158881588915890158911589215893158941589515896158971589815899159001590115902159031590415905159061590715908159091591015911159121591315914159151591615917159181591915920159211592215923159241592515926159271592815929159301593115932159331593415935159361593715938159391594015941159421594315944159451594615947159481594915950159511595215953159541595515956159571595815959159601596115962159631596415965159661596715968159691597015971159721597315974159751597615977159781597915980159811598215983159841598515986159871598815989159901599115992159931599415995159961599715998159991600016001160021600316004160051600616007160081600916010160111601216013160141601516016160171601816019160201602116022160231602416025160261602716028160291603016031160321603316034160351603616037160381603916040160411604216043160441604516046160471604816049160501605116052160531605416055160561605716058160591606016061160621606316064160651606616067160681606916070160711607216073160741607516076160771607816079160801608116082160831608416085160861608716088160891609016091160921609316094160951609616097160981609916100161011610216103161041610516106161071610816109161101611116112161131611416115161161611716118161191612016121161221612316124161251612616127161281612916130161311613216133161341613516136161371613816139161401614116142161431614416145161461614716148161491615016151161521615316154161551615616157161581615916160161611616216163161641616516166161671616816169161701617116172161731617416175161761617716178161791618016181161821618316184161851618616187161881618916190161911619216193
  1. /*
  2. * Copyright (c) 2022,2024 HPMicro
  3. *
  4. * SPDX-License-Identifier: BSD-3-Clause
  5. *
  6. */
  7. #ifndef __HPM_MATH_H__
  8. #define __HPM_MATH_H__
  9. #include <stddef.h>
  10. /**
  11. * @defgroup hpmmath HPMicro Math Functions
  12. * @ingroup middleware_interfaces
  13. */
  14. #define HPM_DSP_HW_NDS32 1 /* andes hardware dsp */
  15. #ifdef CONFIG_HPM_MATH_HAS_EXTRA_CONFIG
  16. #include CONFIG_HPM_MATH_HAS_EXTRA_CONFIG
  17. #else
  18. /* Enable Compute Cell Library*/
  19. /* #define HPM_EN_MATH_FFA_LIB */
  20. /* #define HPM_EN_MATH_DSP_LIB */
  21. /* #define HPM_EN_MATH_NN_LIB */
  22. #define HPM_MATH_DSP_STATISTICS 1
  23. #define HPM_MATH_DSP_BASIC 1
  24. #define HPM_MATH_DSP_COMPLEX 1
  25. #define HPM_MATH_DSP_CONTROLLER 1
  26. #define HPM_MATH_DSP_DISTANCE 1
  27. #define HPM_MATH_DSP_FILTERING 1
  28. #define HPM_MATH_DSP_MATRIX 1
  29. #define HPM_MATH_DSP_SVM 1
  30. #define HPM_MATH_DSP_TRANSFORM 1
  31. #define HPM_MATH_DSP_UTILS 1
  32. #define HPM_MATH_DSP_SORT 1
  33. #define HPM_MATH_NN_ACTIVATION 1
  34. #define HPM_MATH_NN_TINYENGINE 1
  35. #define HPM_MATH_NN_BASIC 1
  36. #define HPM_MATH_NN_CONCATENATION 1
  37. #define HPM_MATH_NN_CONVOLUTION 1
  38. #define HPM_MATH_NN_CONNECTED 1
  39. #define HPM_MATH_NN_POOLING 1
  40. #define HPM_MATH_NN_SOFTMAX 1
  41. #define HPM_MATH_NN_UTIL 1
  42. #define HPM_DSP_CORE HPM_DSP_HW_NDS32 /* DSP core selection */
  43. #define HPM_MATH_PI (3.14159265358979323846)
  44. /**
  45. * @brief HPM_MATH_SW_FFT_CHECKLIST Enabled to use table lookup to speed up the software fft,
  46. * but will increase the code space,and only support sampling points 2^( 2-10).
  47. *
  48. * With this option turned off,
  49. * the software fft can support as many sample points as necessary with sufficient space
  50. *
  51. */
  52. #define HPM_MATH_SW_FFT_CHECKLIST
  53. #endif
  54. #ifdef __cplusplus
  55. extern "C"
  56. {
  57. #endif
  58. #ifdef HPM_MATH_DSP_STATISTICS
  59. /**
  60. * @defgroup statistics DSP Statistics Functions
  61. * @ingroup hpmmath
  62. * @{
  63. */
  64. #ifdef HPM_EN_MATH_DSP_LIB
  65. #ifdef __zcc__
  66. #include "tpt_math.h"
  67. #endif
  68. #include "riscv_dsp_statistics_math.h"
  69. // Maximum
  70. /**
  71. * @brief Maximum value of the floating-potint vector.
  72. * @param[in] *src points to the input vector.
  73. * @param[in] size size of the vectors.
  74. * @param[out] *index index of the maximum value.
  75. * @return maximum value.
  76. */
  77. static inline float32_t hpm_dsp_max_f32(const float32_t *src, uint32_t size, uint32_t *index)
  78. {
  79. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  80. #ifdef __zcc__
  81. f32_t res;
  82. tpt_max_f32(&res, index, src, size);
  83. return res;
  84. #else
  85. return riscv_dsp_max_f32(src, size, index);
  86. #endif
  87. #endif
  88. }
  89. static inline float32_t hpm_dsp_max_val_f32(const float32_t *src, uint32_t size)
  90. {
  91. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  92. return riscv_dsp_max_val_f32(src, size);
  93. #endif
  94. }
  95. /**
  96. * @brief Maximum value of the q15 vector.
  97. * @param[in] *src points to the input vector.
  98. * @param[in] size size of the vectors.
  99. * @param[out] *index index of the maximum value.
  100. * @return maximum value.
  101. */
  102. static inline q15_t hpm_dsp_max_q15(const q15_t *src, uint32_t size, uint32_t *index)
  103. {
  104. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  105. #ifdef __zcc__
  106. q15_t res;
  107. tpt_max_q15(&res, index, src, size);
  108. return res;
  109. #else
  110. return riscv_dsp_max_q15(src, size, index);
  111. #endif
  112. #endif
  113. }
  114. /**
  115. * @brief Maximum value of the q31 vector.
  116. * @param[in] *src points to the input vector.
  117. * @param[in] size size of the vectors.
  118. * @param[out] *index index of the maximum value.
  119. * @return maximum value.
  120. */
  121. static inline q31_t hpm_dsp_max_q31(const q31_t *src, uint32_t size, uint32_t *index)
  122. {
  123. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  124. #ifdef __zcc__
  125. q31_t res;
  126. tpt_max_q31(&res, index, src, size);
  127. return res;
  128. #else
  129. return riscv_dsp_max_q31(src, size, index);
  130. #endif
  131. #endif
  132. }
  133. /**
  134. * @brief Maximum value of the q7 vector.
  135. * @param[in] *src points to the input vector.
  136. * @param[in] size size of the vectors.
  137. * @param[out] *index index of the maximum value.
  138. * @return maximum value.
  139. */
  140. static inline q7_t hpm_dsp_max_q7(const q7_t *src, uint32_t size, uint32_t *index)
  141. {
  142. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  143. #ifdef __zcc__
  144. q7_t res;
  145. tpt_max_q7(&res, index, src, size);
  146. return res;
  147. #else
  148. return riscv_dsp_max_q7(src, size, index);
  149. #endif
  150. #endif
  151. }
  152. /**
  153. * @brief Max value of the u8 vector.
  154. * @param[in] *src points to the input vector.
  155. * @param[in] size size of the vectors.
  156. * @param[out] *index index of the maximum value.
  157. * @return max value.
  158. */
  159. static inline uint8_t hpm_dsp_max_u8(const uint8_t *src, uint32_t size, uint32_t *index)
  160. {
  161. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  162. return riscv_dsp_max_u8(src, size, index);
  163. #endif
  164. }
  165. // Minimum
  166. /**
  167. * @brief Minimum value of the floating-potint vector.
  168. * @param[in] *src points to the input vector.
  169. * @param[in] size size of the vectors.
  170. * @param[out] *index index of the minimum value.
  171. * @return minimum value.
  172. */
  173. static inline float32_t hpm_dsp_min_f32(const float32_t *src, uint32_t size, uint32_t *index)
  174. {
  175. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  176. #ifdef __zcc__
  177. f32_t res;
  178. tpt_min_f32(&res, index, src, size);
  179. return res;
  180. #else
  181. return riscv_dsp_min_f32(src, size, index);
  182. #endif
  183. #endif
  184. }
  185. /**
  186. * @brief Minimum value of the q15 vector.
  187. * @param[in] *src points to the input vector.
  188. * @param[in] size size of the vectors.
  189. * @param[out] *index index of the minimum value.
  190. * @return minimum value.
  191. */
  192. static inline q15_t hpm_dsp_min_q15(const q15_t *src, uint32_t size, uint32_t *index)
  193. {
  194. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  195. #ifdef __zcc__
  196. q15_t res;
  197. tpt_min_q15(&res, index, src, size);
  198. return res;
  199. #else
  200. return riscv_dsp_min_q15(src, size, index);
  201. #endif
  202. #endif
  203. }
  204. /**
  205. * @brief Minimum value of the q31 vector.
  206. * @param[in] *src points to the input vector.
  207. * @param[in] size size of the vectors.
  208. * @param[out] *index index of the minimum value.
  209. * @return minimum value.
  210. */
  211. static inline q31_t hpm_dsp_min_q31(const q31_t *src, uint32_t size, uint32_t *index)
  212. {
  213. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  214. #ifdef __zcc__
  215. q31_t res;
  216. tpt_min_q31(&res, index, src, size);
  217. return res;
  218. #else
  219. return riscv_dsp_min_q31(src, size, index);
  220. #endif
  221. #endif
  222. }
  223. /**
  224. * @brief Minimum value of the q7 vector.
  225. * @param[in] *src points to the input vector.
  226. * @param[in] size size of the vectors.
  227. * @param[out] *index index of the minimum value.
  228. * @return minimum value.
  229. */
  230. static inline q7_t hpm_dsp_min_q7(const q7_t *src, uint32_t size, uint32_t *index)
  231. {
  232. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  233. #ifdef __zcc__
  234. q7_t res;
  235. tpt_min_q7(&res, index, src, size);
  236. return res;
  237. #else
  238. return riscv_dsp_min_q7(src, size, index);
  239. #endif
  240. #endif
  241. }
  242. /**
  243. * @brief Minimum value of the u8 vector.
  244. * @param[in] *src points to the input vector.
  245. * @param[in] size size of the vectors.
  246. * @param[out] *index index of the minimum value.
  247. * @return minimum value.
  248. */
  249. static inline uint8_t hpm_dsp_min_u8(const uint8_t *src, uint32_t size, uint32_t *index)
  250. {
  251. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  252. return riscv_dsp_min_u8(src, size, index);
  253. #endif
  254. }
  255. // Mean
  256. /**
  257. * @brief Mean value of the floating-potint vector.
  258. * @param[in] *src points to the input vector.
  259. * @param[in] size size of the vectors.
  260. * @return mean value.
  261. */
  262. static inline float32_t hpm_dsp_mean_f32(const float32_t *src, uint32_t size)
  263. {
  264. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  265. #ifdef __zcc__
  266. f32_t res;
  267. tpt_mean_f32(&res, src, size);
  268. return res;
  269. #else
  270. return riscv_dsp_mean_f32(src, size);
  271. #endif
  272. #endif
  273. }
  274. /**
  275. * @brief Mean value of the q15 vector.
  276. * @param[in] *src points to the input vector.
  277. * @param[in] size size of the vectors.
  278. * @return mean value.
  279. *
  280. * <b>Function notes:</b>
  281. *
  282. * The 1.15 format input is accumulated in a 32-bit accumulator in 17.15
  283. * format and then truncated to yield a result of 1.15 format.
  284. */
  285. static inline q15_t hpm_dsp_mean_q15(const q15_t *src, uint32_t size)
  286. {
  287. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  288. #ifdef __zcc__
  289. q15_t res;
  290. tpt_mean_q15(&res, src, size);
  291. return res;
  292. #else
  293. return riscv_dsp_mean_q15(src, size);
  294. #endif
  295. #endif
  296. }
  297. /**
  298. * @brief Mean value of the q31 vector.
  299. * @param[in] *src points to the input vector.
  300. * @param[in] size size of the vectors.
  301. * @return mean value.
  302. *
  303. * <b>Function notes:</b>
  304. *
  305. * The 1.31 format input is accumulated in a 64-bit accumulator in 33.31
  306. * format and then truncated to yield a result of 1.31 format.
  307. */
  308. static inline q31_t hpm_dsp_mean_q31(const q31_t *src, uint32_t size)
  309. {
  310. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  311. #ifdef __zcc__
  312. q31_t res;
  313. tpt_mean_q31(&res, src, size);
  314. return res;
  315. #else
  316. return riscv_dsp_mean_q31(src, size);
  317. #endif
  318. #endif
  319. }
  320. /**
  321. * @brief Mean value of the q7 vector.
  322. * @param[in] *src points to the input vector.
  323. * @param[in] size size of the vectors.
  324. * @return mean value.
  325. *
  326. * <b>Function notes:</b>
  327. *
  328. * The 1.7 format input is accumulated in a 32-bit accumulator in 25.7
  329. * format and then truncated to yield a result of 1.7 format.
  330. */
  331. static inline q7_t hpm_dsp_mean_q7(const q7_t *src, uint32_t size)
  332. {
  333. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  334. #ifdef __zcc__
  335. q7_t res;
  336. tpt_mean_q7(&res, src, size);
  337. return res;
  338. #else
  339. return riscv_dsp_mean_q7(src, size);
  340. #endif
  341. #endif
  342. }
  343. /**
  344. * @brief Mean value of the u8 vector.
  345. * @param[in] *src points to the input vector.
  346. * @param[in] size size of the vectors.
  347. * @return mean value.
  348. *
  349. * The 8-bit format input is accumulated in a 32-bit accumulator
  350. * and then truncated to yield a result of 8-bit format.
  351. */
  352. static inline uint8_t hpm_dsp_mean_u8(const uint8_t *src, uint32_t size)
  353. {
  354. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  355. return riscv_dsp_mean_u8(src, size);
  356. #endif
  357. }
  358. // Sun of the Squares
  359. /**
  360. * @brief Sum of the squares of the floating-potint vector.
  361. * @param[in] *src points to the input vector.
  362. * @param[in] size size of the vectors.
  363. * @return Sum of the squares value.
  364. */
  365. static inline float32_t hpm_dsp_pwr_f32(const float32_t *src, uint32_t size)
  366. {
  367. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  368. #ifdef __zcc__
  369. f32_t res;
  370. tpt_power_f32(&res, src, size);
  371. return res;
  372. #else
  373. return riscv_dsp_pwr_f32(src, size);
  374. #endif
  375. #endif
  376. }
  377. /**
  378. * @brief Sum of the squares of the q15 vector.
  379. * @param[in] *src points to the input vector.
  380. * @param[in] size size of the vectors.
  381. * @return Sum of the squares value.
  382. *
  383. * <b>Function notes:</b>
  384. *
  385. * The 1.15 format input is multiplied yields a 2.30 format, and then added
  386. * without saturation to a 64-bit accumulator in 34.30 format. Finally,
  387. * the return result is in 34.30 format.
  388. */
  389. static inline q63_t hpm_dsp_pwr_q15(const q15_t *src, uint32_t size)
  390. {
  391. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  392. #ifdef __zcc__
  393. q63_t res;
  394. tpt_power_q15(&res, src, size);
  395. return res;
  396. #else
  397. return riscv_dsp_pwr_q15(src, size);
  398. #endif
  399. #endif
  400. }
  401. /**
  402. * @brief Sum of the squares of the q31 vector.
  403. * @param[in] *src points to the input vector.
  404. * @param[in] size size of the vectors.
  405. * @return Sum of the squares value.
  406. *
  407. * <b>Function notes:</b>
  408. *
  409. * The 1.31 format input is multiplied yields a 2.62 format and this result
  410. * is truncated to 2.48 format by discarding the lower 14 bits. The 2.48
  411. * result is then added without saturation to a 64-bit accumulator in 16.48
  412. * format. Finally, the return result is in 16.48 format.
  413. */
  414. static inline q63_t hpm_dsp_pwr_q31(const q31_t *src, uint32_t size)
  415. {
  416. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  417. #ifdef __zcc__
  418. q63_t res;
  419. tpt_power_q31(&res, src, size);
  420. return res;
  421. #else
  422. return riscv_dsp_pwr_q31(src, size);
  423. #endif
  424. #endif
  425. }
  426. /**
  427. * @brief Sum of the squares of the q7 vector.
  428. * @param[in] *src points to the input vector.
  429. * @param[in] size size of the vectors.
  430. * @return Sum of the squares value.
  431. *
  432. * <b>Function notes:</b>
  433. *
  434. * The 1.7 format input is multiplied yields a 2.14 format, and then added
  435. * without saturation to a 32-bit accumulator in 18.14 format. Finally,
  436. * the return result is in 18.14 format.
  437. */
  438. static inline q31_t hpm_dsp_pwr_q7(const q7_t *src, uint32_t size)
  439. {
  440. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  441. #ifdef __zcc__
  442. q31_t res;
  443. tpt_power_q7(&res, src, size);
  444. return res;
  445. #else
  446. return riscv_dsp_pwr_q7(src, size);
  447. #endif
  448. #endif
  449. }
  450. // Root Mean Square
  451. /**
  452. * @brief RMS of the floating-potint vector.
  453. * @param[in] *src points to the input vector.
  454. * @param[in] size size of the vectors.
  455. * @return RMS value.
  456. */
  457. static inline float32_t hpm_dsp_rms_f32(const float32_t *src, uint32_t size)
  458. {
  459. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  460. #ifdef __zcc__
  461. f32_t res;
  462. tpt_rms_f32(&res, src, size);
  463. return res;
  464. #else
  465. return riscv_dsp_rms_f32(src, size);
  466. #endif
  467. #endif
  468. }
  469. /**
  470. * @brief RMS of the q15 vector.
  471. * @param[in] *src points to the input vector.
  472. * @param[in] size size of the vectors.
  473. * @return RMS value.
  474. *
  475. * <b>Function notes:</b>
  476. *
  477. * The 1.15 format input is multiplied yields a 2.30 format, and then added
  478. * without saturation to a 64-bit accumulator in 34.30 format. Finally,
  479. * the added output is truncated to 34.15 format by discarding the lower 15
  480. * bits, and then saturated to yield a result in 1.15 format.
  481. */
  482. static inline q15_t hpm_dsp_rms_q15(const q15_t *src, uint32_t size)
  483. {
  484. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  485. #ifdef __zcc__
  486. q15_t res;
  487. tpt_rms_q15(&res, src, size);
  488. return res;
  489. #else
  490. return riscv_dsp_rms_q15(src, size);
  491. #endif
  492. #endif
  493. }
  494. /**
  495. * @brief RMS of the q31 vector.
  496. * @param[in] *src points to the input vector.
  497. * @param[in] size size of the vectors.
  498. * @return RMS value.
  499. *
  500. * <b>Function notes:</b>
  501. *
  502. * The 1.31 format input is multiplied yields a 2.62 format. In order to
  503. * avoid overflows, the input signal must be scaled down by
  504. * <code>log2(size)</code> bits, Finally, the 2.62 accumulator is right
  505. * shifted by 31 bits to yield a 1.31 format value.
  506. */
  507. static inline q31_t hpm_dsp_rms_q31(const q31_t *src, uint32_t size)
  508. {
  509. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  510. #ifdef __zcc__
  511. q31_t res;
  512. tpt_rms_q31(&res, src, size);
  513. return res;
  514. #else
  515. return riscv_dsp_rms_q31(src, size);
  516. #endif
  517. #endif
  518. }
  519. // Standard deviation
  520. /**
  521. * @brief Standard deviation of the floating-potint vector.
  522. * @param[in] *src points to the input vector.
  523. * @param[in] size size of the vectors.
  524. * @return Standard deviation value.
  525. */
  526. static inline float32_t hpm_dsp_std_f32(const float32_t *src, uint32_t size)
  527. {
  528. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  529. #ifdef __zcc__
  530. f32_t res;
  531. tpt_std_f32(&res, src, size);
  532. return res;
  533. #else
  534. return riscv_dsp_std_f32(src, size);
  535. #endif
  536. #endif
  537. }
  538. /**
  539. * @brief Standard deviation of the q15 vector.
  540. * @param[in] *src points to the input vector.
  541. * @param[in] size size of the vectors.
  542. * @return Standard deviation value.
  543. *
  544. * <b>Function notes:</b>
  545. *
  546. * The 1.15 format input is multiplied yields a 2.30 format, and then added
  547. * without saturation to a 64-bit accumulator in 34.30 format. Finally,
  548. * the added output is truncated to 34.15 format by discarding the lower 15
  549. * bits, and then saturated to yield a result in 1.15 format.
  550. */
  551. static inline q15_t hpm_dsp_std_q15(const q15_t *src, uint32_t size)
  552. {
  553. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  554. #ifdef __zcc__
  555. q15_t res;
  556. tpt_std_q15(&res, src, size);
  557. return res;
  558. #else
  559. return riscv_dsp_std_q15(src, size);
  560. #endif
  561. #endif
  562. }
  563. /**
  564. * @brief Standard deviation of the q31 vector.
  565. * @param[in] *src points to the input vector.
  566. * @param[in] size size of the vectors.
  567. * @return Standard deviation value.
  568. *
  569. * <b>Function notes:</b>
  570. *
  571. * The 1.31 format input is multiplied yields a 2.62 format. In order to
  572. * avoid overflows, the input signal must be scaled down by
  573. * <code>log2(size)</code> bits, Finally, the 2.62 accumulator is right
  574. * shifted by 31 bits to yield a 1.31 format value.
  575. */
  576. static inline q31_t hpm_dsp_std_q31(const q31_t *src, uint32_t size)
  577. {
  578. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  579. #ifdef __zcc__
  580. q31_t res;
  581. tpt_std_q31(&res, src, size);
  582. return res;
  583. #else
  584. return riscv_dsp_std_q31(src, size);
  585. #endif
  586. #endif
  587. }
  588. /**
  589. * @brief Standard deviation of the u8 vector.
  590. * @param[in] *src points to the input vector.
  591. * @param[in] size size of the vectors.
  592. * @return Standard deviation value.
  593. *
  594. * <b>Function notes:</b>
  595. * The 8-bit format input is multiplied yields a 16-bit format, and then added
  596. * saturation to a 32-bit accumulator in 16.16 format. Finally,
  597. * the added output is truncated to 34.15 format by discarding the lower 1
  598. * bits, and then saturated to yield a result in 1.15 format.
  599. */
  600. static inline q15_t hpm_dsp_std_u8(const uint8_t *src, uint32_t size)
  601. {
  602. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  603. return riscv_dsp_std_u8(src, size);
  604. #endif
  605. }
  606. // Variance
  607. /**
  608. * @brief Variance of the floating-potint vector.
  609. * @param[in] *src points to the input vector.
  610. * @param[in] size size of the vectors.
  611. * @return Variance value.
  612. */
  613. static inline float32_t hpm_dsp_var_f32(const float32_t *src, uint32_t size)
  614. {
  615. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  616. #ifdef __zcc__
  617. f32_t res;
  618. tpt_var_f32(&res, src, size);
  619. return res;
  620. #else
  621. return riscv_dsp_var_f32(src, size);
  622. #endif
  623. #endif
  624. }
  625. /**
  626. * @brief Variance of the q15 vector.
  627. * @param[in] *src points to the input vector.
  628. * @param[in] size size of the vectors.
  629. * @return Variance value.
  630. *
  631. * <b>Function notes:</b>
  632. *
  633. * The 1.15 format input is multiplied yields a 2.30 format, and then added
  634. * without saturation to a 64-bit accumulator in 34.30 format. Finally,
  635. * the added output is truncated to 34.15 format by discarding the lower 15
  636. * bits, and then saturated to yield a result in 1.15 format.
  637. */
  638. static inline q31_t hpm_dsp_var_q15(const q15_t *src, uint32_t size)
  639. {
  640. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  641. #ifdef __zcc__
  642. q15_t res;
  643. tpt_var_q15(&res, src, size);
  644. return res;
  645. #else
  646. return riscv_dsp_var_q15(src, size);
  647. #endif
  648. #endif
  649. }
  650. /**
  651. * @brief Variance of the q31 vector.
  652. * @param[in] *src points to the input vector.
  653. * @param[in] size size of the vectors.
  654. * @return Variance value.
  655. *
  656. * <b>Function notes:</b>
  657. *
  658. * The 1.31 format input is multiplied yields a 2.62 format. In order to
  659. * avoid overflows, the input signal must be scaled down by
  660. * <code>log2(size)</code> bits, Finally, the 2.62 accumulator is right
  661. * shifted by 31 bits to yield a 1.31 format value.
  662. */
  663. static inline q63_t hpm_dsp_var_q31(const q31_t *src, uint32_t size)
  664. {
  665. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  666. #ifdef __zcc__
  667. q31_t res;
  668. tpt_var_q31(&res, src, size);
  669. return res;
  670. #else
  671. return riscv_dsp_var_q31(src, size);
  672. #endif
  673. #endif
  674. }
  675. /**
  676. * @brief Entropy of the floating-potint vector.
  677. * @param[in] *src points to the input vector.
  678. * @param[in] size size of the vectors.
  679. * @return Entropy value.
  680. *
  681. * E = -sum (P .* log2 (P))
  682. */
  683. static inline float32_t hpm_dsp_entropy_f32(const float32_t *src, uint32_t size)
  684. {
  685. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  686. #ifdef __zcc__
  687. return tpt_entropy_f32(src, size);
  688. #else
  689. return riscv_dsp_entropy_f32(src, size);
  690. #endif
  691. #endif
  692. }
  693. /**
  694. * @brief Relative Entropy of the floating-potint vector.
  695. * @param[in] *src1 points to the first input vector.
  696. * @param[in] *src2 points to the second input vector.
  697. * @param[in] size size of the vectors.
  698. * @return Relative Entropy value.
  699. *
  700. * Relative Entropy also called KullbackLeibler divergence:
  701. * D(A || B) = A * ln(A / B);
  702. *
  703. */
  704. static inline float32_t hpm_dsp_relative_entropy_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
  705. {
  706. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  707. #ifdef __zcc__
  708. return tpt_relative_entropy_f32(src1, src2, size);
  709. #else
  710. return riscv_dsp_relative_entropy_f32(src1, src2, size);
  711. #endif
  712. #endif
  713. }
  714. /**
  715. * @brief Log-Sum-Exp of the floating-potint vector.
  716. * @param[in] *src points to the input vector.
  717. * @param[in] size size of the vectors.
  718. * @return lse value.
  719. *
  720. */
  721. static inline float32_t hpm_dsp_lse_f32(const float32_t *src, uint32_t size)
  722. {
  723. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  724. #ifdef __zcc__
  725. tpt_lse_f32(src, size);
  726. #else
  727. return riscv_dsp_lse_f32(src, size);
  728. #endif
  729. #endif
  730. }
  731. /**
  732. * @brief Dot product with Log-Sum-Exp of the floating-potint vector.
  733. * @param[in] *src1 points to the first input vector.
  734. * @param[in] *src2 points to the second input vector.
  735. * @param[in] size size of the vectors.
  736. * @param[in] *buffer points to temporary buffer.
  737. * @return the Log-Sum-Exp of dot product value.
  738. *
  739. */
  740. static inline float32_t hpm_dsp_lse_dprod_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *buffer)
  741. {
  742. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  743. #ifdef __zcc__
  744. return tpt_lse_dprod_f32(src1, src2, size, buffer);
  745. #else
  746. return riscv_dsp_lse_dprod_f32(src1, src2, size, buffer);
  747. #endif
  748. #endif
  749. }
  750. /**
  751. * @brief Naive Gaussian Bayesian Estimator
  752. *
  753. * @param[in] *instance points to a naive bayes instance
  754. * @param[in] *src points to the elements of the input vector.
  755. * @param[in] *buf points to a buffer of length numofclass /numberOfClasses
  756. * @return The predicted class
  757. *
  758. */
  759. static inline uint32_t hpm_dsp_gaussian_naive_bayes_est_f32(const riscv_dsp_gaussian_naivebayes_f32_t *instance, const float32_t * src, float32_t *buf)
  760. {
  761. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  762. return riscv_dsp_gaussian_naive_bayes_est_f32(instance, src, buf);
  763. #endif
  764. }
  765. /**
  766. * @brief Maximum absolute value of the floating-potint vector.
  767. * @param[in] src pointer of the input vector
  768. * @param[in] size number of elements in a vector
  769. * @param[out] index index of the maximum value
  770. * @return Maximum value
  771. */
  772. static inline float32_t hpm_dsp_absmax_f32(const float32_t* src, uint32_t size, uint32_t* index)
  773. {
  774. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  775. return riscv_dsp_absmax_f32(src, size, index);
  776. #endif
  777. }
  778. /**
  779. * @brief Maximum absolute value of the q15 vector.
  780. * @param[in] src pointer of the input vector
  781. * @param[in] size number of elements in a vector
  782. * @param[out] index index of the maximum value
  783. * @return Maximum value
  784. */
  785. static inline q15_t hpm_dsp_absmax_q15(const q15_t* src, uint32_t size, uint32_t* index)
  786. {
  787. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  788. return riscv_dsp_absmax_q15(src, size, index);
  789. #endif
  790. }
  791. /**
  792. * @brief Maximum absolute value of the q31 vector.
  793. * @param[in] src pointer of the input vector
  794. * @param[in] size number of elements in a vector
  795. * @param[out] index index of the maximum value
  796. * @return Maximum value
  797. */
  798. static inline q31_t hpm_dsp_absmax_q31(const q31_t* src, uint32_t size, uint32_t* index)
  799. {
  800. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  801. return riscv_dsp_absmax_q31(src, size, index);
  802. #endif
  803. }
  804. /**
  805. * @brief Maximum absolute value of the q7 vector.
  806. * @param[in] src pointer of the input vector
  807. * @param[in] size number of elements in a vector
  808. * @param[out] index index of the maximum value
  809. * @return Maximum value
  810. */
  811. static inline q7_t hpm_dsp_absmax_q7(const q7_t* src, uint32_t size, uint32_t* index)
  812. {
  813. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  814. return riscv_dsp_absmax_q7(src, size, index);
  815. #endif
  816. }
  817. /**
  818. * @brief Minimum absolute value of the floating-potint vector.
  819. * @param[in] src pointer of the input vector
  820. * @param[in] size number of elements in a vector
  821. * @param[out] index index of the maximum value
  822. * @return Minimum value
  823. */
  824. static inline float32_t hpm_dsp_absmin_f32(const float32_t* src, uint32_t size, uint32_t* index)
  825. {
  826. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  827. return riscv_dsp_absmin_f32(src, size, index);
  828. #endif
  829. }
  830. /**
  831. * @brief Minimum absolute value of the q31 vector.
  832. * @param[in] src pointer of the input vector
  833. * @param[in] size number of elements in a vector
  834. * @param[out] index index of the maximum value
  835. * @return Minimum value
  836. */
  837. static inline q31_t hpm_dsp_absmin_q31(const q31_t* src, uint32_t size, uint32_t* index)
  838. {
  839. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  840. return riscv_dsp_absmin_q31(src, size, index);
  841. #endif
  842. }
  843. /**
  844. * @brief Minimum absolute value of the q15 vector.
  845. * @param[in] src pointer of the input vector
  846. * @param[in] size number of elements in a vector
  847. * @param[out] index index of the maximum value
  848. * @return Minimum value
  849. */
  850. static inline q15_t hpm_dsp_absmin_q15(const q15_t* src, uint32_t size, uint32_t* index)
  851. {
  852. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  853. return riscv_dsp_absmin_q15(src, size, index);
  854. #endif
  855. }
  856. /**
  857. * @brief Minimum absolute value of the q7 vector.
  858. * @param[in] src pointer of the input vector
  859. * @param[in] size number of elements in a vector
  860. * @param[out] index index of the maximum value
  861. * @return Minimum value
  862. */
  863. static inline q7_t hpm_dsp_absmin_q7(const q7_t* src, uint32_t size, uint32_t* index)
  864. {
  865. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  866. return riscv_dsp_absmin_q7(src, size, index);
  867. #endif
  868. }
  869. #endif
  870. #endif
  871. /**
  872. * @}
  873. *
  874. */
  875. #ifdef HPM_MATH_DSP_BASIC
  876. /**
  877. * @defgroup basic DSP Basic Functions
  878. * @ingroup hpmmath
  879. * @{
  880. */
  881. #ifdef HPM_EN_MATH_DSP_LIB
  882. #ifdef __zcc__
  883. #include "tpt_math.h"
  884. #endif
  885. #include "riscv_dsp_basic_math.h"
  886. // Absolute value
  887. /**
  888. * @brief Absolute value of floating-potint vectors.
  889. * @param[in] *src points to the input vector.
  890. * @param[out] *dst points to the output vector.
  891. * @param[in] size size of the vectors.
  892. */
  893. static inline void hpm_dsp_abs_f32(float32_t *src, float32_t *dst, uint32_t size)
  894. {
  895. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  896. #ifdef __zcc__
  897. tpt_abs_f32(dst, src, size);
  898. #else
  899. riscv_dsp_abs_f32(src, dst, size);
  900. #endif
  901. #endif
  902. }
  903. /**
  904. * @brief Absolute value of q31 vectors.
  905. * @param[in] *src points to the input vector.
  906. * @param[out] *dst points to the output vector.
  907. * @param[in] size size of the vectors.
  908. *
  909. * The Q31 value INT32_MIN (0x80000000) will be saturated to the maximum
  910. * allowable positive value INT32_MAX.
  911. */
  912. static inline void hpm_dsp_abs_q31(q31_t *src, q31_t *dst, uint32_t size)
  913. {
  914. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  915. #ifdef __zcc__
  916. tpt_abs_q31(dst, src, size);
  917. #else
  918. riscv_dsp_abs_q31(src, dst, size);
  919. #endif
  920. #endif
  921. }
  922. /**
  923. * @brief Absolute value of q15 vectors.
  924. * @param[in] *src points to the input vector.
  925. * @param[out] *dst points to the output vector.
  926. * @param[in] size size of the vectors.
  927. *
  928. * The Q15 value INT16_MIN (0x8000) will be saturated to the maximum
  929. * allowable positive value INT16_MAX.
  930. */
  931. static inline void hpm_dsp_abs_q15(q15_t *src, q15_t *dst, uint32_t size)
  932. {
  933. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  934. #ifdef __zcc__
  935. tpt_abs_q15(dst, src, size);
  936. #else
  937. riscv_dsp_abs_q15(src, dst, size);
  938. #endif
  939. #endif
  940. }
  941. /**
  942. * @brief Absolute value of q7 vectors.
  943. * @param[in] *src points to the input vector.
  944. * @param[out] *dst points to the output vector.
  945. * @param[in] size size of the vectors.
  946. *
  947. * The Q7 value INT8_MIN (0x8000) will be saturated to the maximum
  948. * allowable positive value INT8_MAX.
  949. */
  950. static inline void hpm_dsp_abs_q7(q7_t *src, q7_t *dst, uint32_t size)
  951. {
  952. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  953. #ifdef __zcc__
  954. tpt_abs_q7(dst, src, size);
  955. #else
  956. riscv_dsp_abs_q7(src, dst, size);
  957. #endif
  958. #endif
  959. }
  960. // Addition
  961. /**
  962. * @brief Addition of floating-potint vectors.
  963. * @param[in] *src1 points to the first input vector.
  964. * @param[in] *src2 points to the second input vector.
  965. * @param[out] *dst points to the output vector.
  966. * @param[in] size size of the vectors.
  967. */
  968. static inline void hpm_dsp_add_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
  969. {
  970. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  971. #ifdef __zcc__
  972. tpt_add_f32(dst, src1, src2, size);
  973. #else
  974. riscv_dsp_add_f32(src1, src2, dst, size);
  975. #endif
  976. #endif
  977. }
  978. /**
  979. * @brief Addition of q31 vectors.
  980. * @param[in] *src1 points to the first input vector.
  981. * @param[in] *src2 points to the second input vector.
  982. * @param[out] *dst points to the output vector.
  983. * @param[in] size size of the vectors.
  984. *
  985. * Ouput results will be saturated in Q31 range [0x80000000 0x7FFFFFFF].
  986. */
  987. static inline void hpm_dsp_add_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
  988. {
  989. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  990. #ifdef __zcc__
  991. tpt_add_q31(dst, src1, src2, size);
  992. #else
  993. riscv_dsp_add_q31(src1, src2, dst, size);
  994. #endif
  995. #endif
  996. }
  997. /**
  998. * @brief Addition of q15 vectors.
  999. * @param[in] *src1 points to the first input vector.
  1000. * @param[in] *src2 points to the second input vector.
  1001. * @param[out] *dst points to the output vector.
  1002. * @param[in] size size of the vectors.
  1003. *
  1004. * The output results will be saturated in Q15 range [0x8000 0x7FFF].
  1005. */
  1006. static inline void hpm_dsp_add_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
  1007. {
  1008. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1009. #ifdef __zcc__
  1010. tpt_add_q15(dst, src1, src2, size);
  1011. #else
  1012. riscv_dsp_add_q15(src1, src2, dst, size);
  1013. #endif
  1014. #endif
  1015. }
  1016. /**
  1017. * @brief Addition of q7 vectors.
  1018. * @param[in] *src1 points to the first input vector.
  1019. * @param[in] *src2 points to the second input vector.
  1020. * @param[out] *dst points to the output vector.
  1021. * @param[in] size size of the vectors.
  1022. *
  1023. * Ouput results will be saturated in Q7 range [0x80 0x7F].
  1024. */
  1025. static inline void hpm_dsp_add_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
  1026. {
  1027. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1028. #ifdef __zcc__
  1029. tpt_add_q7(dst, src1, src2, size);
  1030. #else
  1031. riscv_dsp_add_q7(src1, src2, dst, size);
  1032. #endif
  1033. #endif
  1034. }
  1035. /**
  1036. * @brief Addition of U8 vectors.
  1037. * @param[in] *src1 points to the first input vector.
  1038. * @param[in] *src2 points to the second input vector.
  1039. * @param[out] *dst points to the output vector.
  1040. * @param[in] size size of the vectors.
  1041. *
  1042. * Ouput results will be saturated in U16 range [0x0000 0xFFFF].
  1043. */
  1044. static inline void hpm_dsp_add_u8_u16(uint8_t *src1, uint8_t *src2, uint16_t *dst, uint32_t size)
  1045. {
  1046. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1047. #ifdef __zcc__
  1048. tpt_add_u8_u16(dst, src1, src2, size);
  1049. #else
  1050. riscv_dsp_add_u8_u16(src1, src2, dst, size);
  1051. #endif
  1052. #endif
  1053. }
  1054. // Subtraction
  1055. /**
  1056. * @brief Subtraction of floating-point vectors.
  1057. * @param[in] *src1 points to the first input vector.
  1058. * @param[in] *src2 points to the second input vector.
  1059. * @param[out] *dst points to the output vector.
  1060. * @param[in] size size of the vectors.
  1061. */
  1062. static inline void hpm_dsp_sub_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
  1063. {
  1064. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1065. #ifdef __zcc__
  1066. tpt_sub_f32(dst, src1, src2, size);
  1067. #else
  1068. riscv_dsp_sub_f32(src1, src2, dst, size);
  1069. #endif
  1070. #endif
  1071. }
  1072. /**
  1073. * @brief Subtraction of q31 vectors.
  1074. * @param[in] *src1 points to the first input vector.
  1075. * @param[in] *src2 points to the second input vector.
  1076. * @param[out] *dst points to the output vector.
  1077. * @param[in] size size of the vectors.
  1078. *
  1079. * Ouput results will be saturated in Q31 range [0x80000000 0x7FFFFFFF].
  1080. */
  1081. static inline void hpm_dsp_sub_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
  1082. {
  1083. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1084. #ifdef __zcc__
  1085. tpt_sub_q31(dst, src1, src2, size);
  1086. #else
  1087. riscv_dsp_sub_q31(src1, src2, dst, size);
  1088. #endif
  1089. #endif
  1090. }
  1091. /**
  1092. * @brief Subtraction of q15 vectors.
  1093. * @param[in] *src1 points to the first input vector.
  1094. * @param[in] *src2 points to the second input vector.
  1095. * @param[out] *dst points to the output vector.
  1096. * @param[in] size size of the vectors.
  1097. *
  1098. * The output results will be saturated in Q15 range [0x8000 0x7FFF].
  1099. */
  1100. static inline void hpm_dsp_sub_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
  1101. {
  1102. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1103. #ifdef __zcc__
  1104. tpt_sub_q15(dst, src1, src2, size);
  1105. #else
  1106. riscv_dsp_sub_q15(src1, src2, dst, size);
  1107. #endif
  1108. #endif
  1109. }
  1110. /**
  1111. * @brief Subtraction of q7 vectors.
  1112. * @param[in] *src1 points to the first input vector.
  1113. * @param[in] *src2 points to the second input vector.
  1114. * @param[out] *dst points to the output vector.
  1115. * @param[in] size size of the vectors.
  1116. *
  1117. * Ouput results will be saturated in Q7 range [0x80 0x7F].
  1118. */
  1119. static inline void hpm_dsp_sub_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
  1120. {
  1121. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1122. #ifdef __zcc__
  1123. tpt_sub_q7(dst, src1, src2, size);
  1124. #else
  1125. riscv_dsp_sub_q7(src1, src2, dst, size);
  1126. #endif
  1127. #endif
  1128. }
  1129. /**
  1130. * @brief Subtraction of u8 vectors.
  1131. * @param[in] *src1 points to the first input vector.
  1132. * @param[in] *src2 points to the second input vector.
  1133. * @param[out] *dst points to the output vector.
  1134. * @param[in] size size of the vectors.
  1135. *
  1136. * Ouput results will be saturated in Q7 range [0x80 0x7F].
  1137. */
  1138. static inline void hpm_dsp_sub_u8_q7(uint8_t *src1, uint8_t *src2, q7_t *dst, uint32_t size)
  1139. {
  1140. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1141. riscv_dsp_sub_u8_q7(src1, src2, dst, size);
  1142. #endif
  1143. }
  1144. // Multiplication
  1145. /**
  1146. * @brief Multiplication of floating-point vectors.
  1147. * @param[in] *src1 points to the first input vector.
  1148. * @param[in] *src2 points to the second input vector.
  1149. * @param[out] *dst points to the output vector.
  1150. * @param[in] size size of the vectors.
  1151. */
  1152. static inline void hpm_dsp_mul_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
  1153. {
  1154. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1155. #ifdef __zcc__
  1156. tpt_mult_f32(dst, src1, src2, size);
  1157. #else
  1158. riscv_dsp_mul_f32(src1, src2, dst, size);
  1159. #endif
  1160. #endif
  1161. }
  1162. /**
  1163. * @brief Multiplication of q31 vectors.
  1164. * @param[in] *src1 points to the first input vector.
  1165. * @param[in] *src2 points to the second input vector.
  1166. * @param[out] *dst points to the output vector.
  1167. * @param[in] size size of the vectors.
  1168. *
  1169. * Ouput results will be saturated in Q31 range [0x80000000 0x7FFFFFFF].
  1170. */
  1171. static inline void hpm_dsp_mul_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
  1172. {
  1173. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1174. #ifdef __zcc__
  1175. tpt_mult_q31(dst, src1, src2, size);
  1176. #else
  1177. riscv_dsp_mul_q31(src1, src2, dst, size);
  1178. #endif
  1179. #endif
  1180. }
  1181. /**
  1182. * @brief Multiplication of q15 vectors.
  1183. * @param[in] *src1 points to the first input vector.
  1184. * @param[in] *src2 points to the second input vector.
  1185. * @param[out] *dst points to the output vector.
  1186. * @param[in] size size of the vectors.
  1187. *
  1188. * Output results will be saturated in Q15 range [0x8000 0x7FFF].
  1189. */
  1190. static inline void hpm_dsp_mul_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
  1191. {
  1192. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1193. #ifdef __zcc__
  1194. tpt_mult_q15(dst, src1, src2, size);
  1195. #else
  1196. riscv_dsp_mul_q15(src1, src2, dst, size);
  1197. #endif
  1198. #endif
  1199. }
  1200. /**
  1201. * @brief Multiplication of q7 vectors.
  1202. * @param[in] *src1 points to the first input vector.
  1203. * @param[in] *src2 points to the second input vector.
  1204. * @param[out] *dst points to the output vector.
  1205. * @param[in] size size of the vectors.
  1206. *
  1207. * Ouput results will be saturated in Q7 range [0x80 0x7F].
  1208. */
  1209. static inline void hpm_dsp_mul_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
  1210. {
  1211. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1212. #ifdef __zcc__
  1213. tpt_mult_q7(dst, src1, src2, size);
  1214. #else
  1215. riscv_dsp_mul_q7(src1, src2, dst, size);
  1216. #endif
  1217. #endif
  1218. }
  1219. /**
  1220. * @brief Multiplication of u8 vectors.
  1221. * @param[in] *src1 points to the first input vector.
  1222. * @param[in] *src2 points to the second input vector.
  1223. * @param[out] *dst points to the output vector.
  1224. * @param[in] size size of the vectors.
  1225. *
  1226. * Ouput results will be in U16 range [0x00 0xFFFF].
  1227. */
  1228. static inline void hpm_dsp_mul_u8_u16(uint8_t *src1, uint8_t *src2, uint16_t *dst, uint32_t size)
  1229. {
  1230. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1231. riscv_dsp_mul_u8_u16(src1, src2, dst, size);
  1232. #endif
  1233. }
  1234. // Division
  1235. /**
  1236. * @brief Division of floating-point vectors.
  1237. * @param[in] *src1 points to the first input vector.
  1238. * @param[in] *src2 points to the second input vector.
  1239. * @param[out] *dst points to the output vector.
  1240. * @param[in] size size of the vectors.
  1241. */
  1242. static inline void hpm_dsp_div_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
  1243. {
  1244. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1245. #ifdef __zcc__
  1246. tpt_div_f32(dst, src1, src2, size);
  1247. #else
  1248. riscv_dsp_div_f32(src1, src2, dst, size);
  1249. #endif
  1250. #endif
  1251. }
  1252. /**
  1253. * @brief Division of q31 inputs.
  1254. * @param[in] src1 the smaller input value.
  1255. * @param[in] src2 the larger input value.
  1256. * @return division of two inputs.
  1257. */
  1258. static inline q31_t hpm_dsp_div_q31(q31_t src1, q31_t src2)
  1259. {
  1260. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1261. #ifdef __zcc__
  1262. return tpt_div_q31(src1, src2);
  1263. #else
  1264. return riscv_dsp_div_q31(src1, src2);
  1265. #endif
  1266. #endif
  1267. }
  1268. /**
  1269. * @brief Division of q63 inputs divided by a positive 32 bits.
  1270. * @param[in] src1 the q63 input value.
  1271. * @param[in] src2 the positive 32 bits input value.
  1272. * @return division of two inputs.
  1273. */
  1274. static inline q31_t hpm_dsp_div_s64_u32(q63_t src1, uint32_t src2)
  1275. {
  1276. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1277. #ifdef __zcc__
  1278. return tpt_div_s64_u32(src1, src2);
  1279. #else
  1280. return riscv_dsp_div_s64_u32(src1, src2);
  1281. #endif
  1282. #endif
  1283. }
  1284. /**
  1285. * @brief Division of positive 64-bits inputs divided by a positive 32-bits.
  1286. * @param[in] src1 the positive 64-bits input value.
  1287. * @param[in] src2 the positive 32-bits input value.
  1288. * @return division of two inputs.
  1289. */
  1290. static inline q31_t hpm_dsp_div_u64_u32(uint64_t src1, uint32_t src2)
  1291. {
  1292. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1293. #ifdef __zcc__
  1294. return tpt_div_u64_u32(src1, src2);
  1295. #else
  1296. return riscv_dsp_div_u64_u32(src1, src2);
  1297. #endif
  1298. #endif
  1299. }
  1300. // Negation
  1301. /**
  1302. * @brief Negation of floating-potint vectors.
  1303. * @param[in] *src points to the input vector.
  1304. * @param[out] *dst points to the output vector.
  1305. * @param[in] size size of the vectors.
  1306. */
  1307. static inline void hpm_dsp_neg_f32(float32_t *src, float32_t *dst, uint32_t size)
  1308. {
  1309. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1310. #ifdef __zcc__
  1311. tpt_negate_f32(dst, src, size);
  1312. #else
  1313. riscv_dsp_neg_f32(src, dst, size);
  1314. #endif
  1315. #endif
  1316. }
  1317. /**
  1318. * @brief Negation of q31 vectors.
  1319. * @param[in] *src points to the input vector.
  1320. * @param[out] *dst points to the output vector.
  1321. * @param[in] size size of the vectors.
  1322. *
  1323. * The Q31 value INT32_MIN (0x80000000) will be saturated to the maximum
  1324. * allowable positive value INT32_MAX.
  1325. */
  1326. static inline void hpm_dsp_neg_q31(q31_t *src, q31_t *dst, uint32_t size)
  1327. {
  1328. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1329. #ifdef __zcc__
  1330. tpt_negate_q31(dst, src, size);
  1331. #else
  1332. riscv_dsp_neg_q31(src, dst, size);
  1333. #endif
  1334. #endif
  1335. }
  1336. /**
  1337. * @brief Negation of q15 vectors.
  1338. * @param[in] *src points to the input vector.
  1339. * @param[out] *dst points to the output vector.
  1340. * @param[in] size size of the vectors.
  1341. *
  1342. * The Q15 value INT16_MIN (0x8000) will be saturated to the maximum
  1343. * allowable positive value INT16_MAX.
  1344. */
  1345. static inline void hpm_dsp_neg_q15(q15_t *src, q15_t *dst, uint32_t size)
  1346. {
  1347. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1348. #ifdef __zcc__
  1349. tpt_negate_q15(dst, src, size);
  1350. #else
  1351. riscv_dsp_neg_q15(src, dst, size);
  1352. #endif
  1353. #endif
  1354. }
  1355. /**
  1356. * @brief Negation of q15 vectors.
  1357. * @param[in] *src points to the input vector.
  1358. * @param[out] *dst points to the output vector.
  1359. * @param[in] size size of the vectors.
  1360. *
  1361. * The Q7 value INT8_MIN (0x80) will be saturated to the maximum allowable
  1362. * positive value INT8_MAX.
  1363. */
  1364. static inline void hpm_dsp_neg_q7(q7_t *src, q7_t *dst, uint32_t size)
  1365. {
  1366. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1367. #ifdef __zcc__
  1368. tpt_negate_q7(dst, src, size);
  1369. #else
  1370. riscv_dsp_neg_q7(src, dst, size);
  1371. #endif
  1372. #endif
  1373. }
  1374. // Dot Production
  1375. /**
  1376. * @brief Dot production of floating-point vectors.
  1377. * @param[in] *src1 points to the first input vector.
  1378. * @param[in] *src2 points to the second input vector.
  1379. * @param[in] size size of the vectors.
  1380. * @return dot product of two input vectors.
  1381. */
  1382. static inline float32_t hpm_dsp_dprod_f32(float32_t *src1, float32_t *src2, uint32_t size)
  1383. {
  1384. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1385. #ifdef __zcc__
  1386. f32_t res;
  1387. tpt_dot_prod_f32(&res, src1, src2, size);
  1388. return res;
  1389. #else
  1390. return riscv_dsp_dprod_f32(src1, src2, size);
  1391. #endif
  1392. #endif
  1393. }
  1394. /**
  1395. * @brief Dot production of q31 vectors.
  1396. * @param[in] *src1 points to the first input vector.
  1397. * @param[in] *src2 points to the second input vector.
  1398. * @param[in] size size of the vectors.
  1399. * @return dot product of two input vectors.
  1400. *
  1401. * The output of multiplications is truncated from 2.62 to 2.48 format and
  1402. * then added without saturation to a 64-bit accumulator. The return value
  1403. * is in 16.48 format. When the size of the vectors less than 2^16, there is
  1404. * no risk to overflow.
  1405. */
  1406. static inline q63_t hpm_dsp_dprod_q31(q31_t *src1, q31_t *src2, uint32_t size)
  1407. {
  1408. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1409. #ifdef __zcc__
  1410. q63_t res;
  1411. tpt_dot_prod_q31(&res, src1, src2, size);
  1412. return res;
  1413. #else
  1414. return riscv_dsp_dprod_q31(src1, src2, size);
  1415. #endif
  1416. #endif
  1417. }
  1418. /**
  1419. * @brief Dot production of q15 vectors.
  1420. * @param[in] *src1 points to the first input vector.
  1421. * @param[in] *src2 points to the second input vector.
  1422. * @param[in] size size of the vectors.
  1423. * @return dot product of two input vectors.
  1424. *
  1425. * The output of multiplications is in 2.30 format and then added to a
  1426. * 64-bit accumulator in 34.30 format. The return value is in 34.30 format.
  1427. */
  1428. static inline q63_t hpm_dsp_dprod_q15(q15_t *src1, q15_t *src2, uint32_t size)
  1429. {
  1430. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1431. #ifdef __zcc__
  1432. q63_t res;
  1433. tpt_dot_prod_q15(&res, src1, src2, size);
  1434. return res;
  1435. #else
  1436. return riscv_dsp_dprod_q15(src1, src2, size);
  1437. #endif
  1438. #endif
  1439. }
  1440. /**
  1441. * @brief Dot production of u8 * q15 vectors.
  1442. * @param[in] *src1 points to the uint8_t format input vector.
  1443. * @param[in] *src2 points to the q15 format input vector.
  1444. * @param[in] size size of the vectors.
  1445. * @return dot product of two input vectors.
  1446. *
  1447. * The output of multiplications is in 1.23 format and then added to an
  1448. * accumulator in 9.23 format. The return result is in 9.23 format.
  1449. */
  1450. static inline q31_t hpm_dsp_dprod_u8xq15(uint8_t *src1, q15_t *src2, uint32_t size)
  1451. {
  1452. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1453. return riscv_dsp_dprod_u8xq15(src1, src2, size);
  1454. #endif
  1455. }
  1456. /**
  1457. * @brief Dot production of q7 vectors.
  1458. * @param[in] *src1 points to the first input vector.
  1459. * @param[in] *src2 points to the second input vector.
  1460. * @param[in] size size of the vectors.
  1461. * @return dot product of two input vectors.
  1462. *
  1463. * The output of multiplications is in 2.14 format and then added to an
  1464. * accumulator in 18.14 format. The return result is in 18.14 format.
  1465. */
  1466. static inline q31_t hpm_dsp_dprod_q7(q7_t *src1, q7_t *src2, uint32_t size)
  1467. {
  1468. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1469. #ifdef __zcc__
  1470. q31_t res;
  1471. tpt_dot_prod_q7(&res, src1, src2, size);
  1472. return res;
  1473. #else
  1474. return riscv_dsp_dprod_q7(src1, src2, size);
  1475. #endif
  1476. #endif
  1477. }
  1478. /**
  1479. * @brief Dot production of q7 * q15 vectors.
  1480. * @param[in] *src1 points to the q7_t format input vector.
  1481. * @param[in] *src2 points to the q15 format input vector.
  1482. * @param[in] size size of the vectors.
  1483. * @return dot product of two input vectors.
  1484. *
  1485. * The output of multiplications is in 1.22 format and then added to an
  1486. * accumulator in 10.22 format. The return result is in 10.22 format.
  1487. */
  1488. static inline q31_t hpm_dsp_dprod_q7xq15(q7_t *src1, q15_t *src2, uint32_t size)
  1489. {
  1490. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1491. return riscv_dsp_dprod_q7xq15(src1, src2, size);
  1492. #endif
  1493. }
  1494. /**
  1495. * @brief Dot production of U8 vectors.
  1496. * @param[in] *src1 points to the first input vector.
  1497. * @param[in] *src2 points to the second input vector.
  1498. * @param[in] size size of the vectors.
  1499. * @return dot product of two input vectors.
  1500. *
  1501. * The output of multiplications is in 0.16 format and then added to an
  1502. * accumulator in 16.16 format. The return result is in 16.16 format.
  1503. */
  1504. static inline uint32_t hpm_dsp_dprod_u8(uint8_t *src1, uint8_t *src2, uint32_t size)
  1505. {
  1506. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1507. return riscv_dsp_dprod_u8(src1, src2, size);
  1508. #endif
  1509. }
  1510. // Offset
  1511. /**
  1512. * @brief The offset of floating-point vectors.
  1513. * @param[in] *src points to the input vector.
  1514. * @param[in] offset is the value to be added.
  1515. * @param[out] *dst points to the output vector.
  1516. * @param[in] size size of the vectors.
  1517. */
  1518. static inline void hpm_dsp_offset_f32(float32_t *src, float32_t offset, float32_t *dst, uint32_t size)
  1519. {
  1520. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1521. #ifdef __zcc__
  1522. tpt_offset_f32(dst, src, offset, size);
  1523. #else
  1524. riscv_dsp_offset_f32(src, offset, dst, size);
  1525. #endif
  1526. #endif
  1527. }
  1528. /**
  1529. * @brief The offset of q31 vectors.
  1530. * @param[in] *src points to the input vector.
  1531. * @param[in] offset is the value to be added.
  1532. * @param[out] *dst points to the output vector.
  1533. * @param[in] size size of the vectors.
  1534. *
  1535. * Output results are saturated in Q31 range [0x80000000 0x7FFFFFFF].
  1536. */
  1537. static inline void hpm_dsp_offset_q31(q31_t *src, q31_t offset, q31_t *dst, uint32_t size)
  1538. {
  1539. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1540. #ifdef __zcc__
  1541. tpt_offset_q31(dst, src, offset, size);
  1542. #else
  1543. riscv_dsp_offset_q31(src, offset, dst, size);
  1544. #endif
  1545. #endif
  1546. }
  1547. /**
  1548. * @brief The offset of q15 vectors.
  1549. * @param[in] *src points to the input vector.
  1550. * @param[in] offset is the value to be added.
  1551. * @param[out] *dst points to the output vector.
  1552. * @param[in] size size of the vectors.
  1553. *
  1554. * Output results are saturated in Q15 range [0x8000 0x7FFF].
  1555. */
  1556. static inline void hpm_dsp_offset_q15(q15_t *src, q15_t offset, q15_t *dst, uint32_t size)
  1557. {
  1558. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1559. #ifdef __zcc__
  1560. tpt_offset_q15(dst, src, offset, size);
  1561. #else
  1562. riscv_dsp_offset_q15(src, offset, dst, size);
  1563. #endif
  1564. #endif
  1565. }
  1566. /**
  1567. * @brief The offset of q7 vectors.
  1568. * @param[in] *src points to the input vector.
  1569. * @param[in] offset is the value to be added.
  1570. * @param[out] *dst points to the output vector.
  1571. * @param[in] size size of the vectors.
  1572. *
  1573. * Output results are saturated in Q7 range [0x80 0x7F].
  1574. */
  1575. static inline void hpm_dsp_offset_q7(q7_t *src, q7_t offset, q7_t *dst, uint32_t size)
  1576. {
  1577. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1578. #ifdef __zcc__
  1579. tpt_offset_q7(dst, src, offset, size);
  1580. #else
  1581. riscv_dsp_offset_q7(src, offset, dst, size);
  1582. #endif
  1583. #endif
  1584. }
  1585. /**
  1586. * @brief The offset of U8 vectors.
  1587. * @param[in] *src points to the input vector.
  1588. * @param[in] offset is the value to be added.
  1589. * @param[out] *dst points to the output vector.
  1590. * @param[in] size size of the vectors.
  1591. *
  1592. * Output results are saturated in U8 range [0x00 0xFF].
  1593. */
  1594. static inline void hpm_dsp_offset_u8(uint8_t *src, q7_t offset, uint8_t *dst, uint32_t size)
  1595. {
  1596. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1597. riscv_dsp_offset_u8(src, offset, dst, size);
  1598. #endif
  1599. }
  1600. // Scale
  1601. /**
  1602. * @brief To multiply a floating-point vectors by a floating-point scale.
  1603. * @param[in] *src points to the input vector.
  1604. * @param[in] scale is the value to be multiplied.
  1605. * @param[out] *dst points to the output vector.
  1606. * @param[in] size size of the vectors.
  1607. */
  1608. static inline void hpm_dsp_scale_f32(float32_t *src, float32_t scale, float32_t *dst, uint32_t size)
  1609. {
  1610. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1611. #ifdef __zcc__
  1612. tpt_scale_f32(dst, src, scale, size);
  1613. #else
  1614. riscv_dsp_scale_f32(src, scale, dst, size);
  1615. #endif
  1616. #endif
  1617. }
  1618. /**
  1619. * @brief To multiply a q31 vectors by a q31 scale.
  1620. * @param[in] *src points to the input vector.
  1621. * @param[in] scalefract is the fractional portion value
  1622. * to be multiplied.
  1623. * @param[in] shift number of bits to shift.
  1624. * @param[out] *dst points to the output vector.
  1625. * @param[in] size size of the vectors.
  1626. *
  1627. * These are multiplied to yield a 2.62 output and then is shift with
  1628. * saturation to 1.31 format.
  1629. */
  1630. static inline void hpm_dsp_scale_q31(q31_t *src, q31_t scalefract, int8_t shift, q31_t *dst, uint32_t size)
  1631. {
  1632. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1633. #ifdef __zcc__
  1634. tpt_scale_q31(dst, src, scalefract, shift, size);
  1635. #else
  1636. riscv_dsp_scale_q31(src, scalefract, shift, dst, size);
  1637. #endif
  1638. #endif
  1639. }
  1640. /**
  1641. * @brief To multiply a q15 vectors by a q15 scale.
  1642. * @param[in] *src points to the input vector.
  1643. * @param[in] scalefract is the fractional portion value
  1644. * to be multiplied.
  1645. * @param[in] shift number of bits to shift.
  1646. * @param[out] *dst points to the output vector.
  1647. * @param[in] size size of the vectors.
  1648. *
  1649. * These are multiplied to yield a 2.30 output and then is shifted with
  1650. * saturation to 1.15 format.
  1651. */
  1652. static inline void hpm_dsp_scale_q15(q15_t *src, q15_t scalefract, int8_t shift, q15_t *dst, uint32_t size)
  1653. {
  1654. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1655. #ifdef __zcc__
  1656. tpt_scale_q15(dst, src, scalefract, shift, size);
  1657. #else
  1658. riscv_dsp_scale_q15(src, scalefract, shift, dst, size);
  1659. #endif
  1660. #endif
  1661. }
  1662. /**
  1663. * @brief To multiply a q7 vectors by a q7 scale.
  1664. * @param[in] *src points to the input vector.
  1665. * @param[in] scalefract is the fractional portion value
  1666. * to be multiplied.
  1667. * @param[in] shift number of bits to shift.
  1668. * @param[out] *dst points to the output vector.
  1669. * @param[in] size size of the vectors.
  1670. *
  1671. * These are multiplied to yield a 2.14 output and then is shifted with
  1672. * saturation to 1.7 format.
  1673. */
  1674. static inline void hpm_dsp_scale_q7(q7_t *src, q7_t scalefract, int8_t shift, q7_t *dst, uint32_t size)
  1675. {
  1676. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1677. #ifdef __zcc__
  1678. tpt_scale_q7(dst, src, scalefract, shift, size);
  1679. #else
  1680. riscv_dsp_scale_q7(src, scalefract, shift, dst, size);
  1681. #endif
  1682. #endif
  1683. }
  1684. /**
  1685. * @brief To multiply a u8 vectors by a q7 scale.
  1686. * @param[in] *src points to the input vector.
  1687. * @param[in] scalefract: is the fractional portion value to be multiplied.
  1688. * @param[in] shift: number of bits to shift.
  1689. * @param[out] *dst points to the output vector.
  1690. * @param[in] size size of the vectors.
  1691. *
  1692. * The inputs are multiplied to yield a 1.15 output and then are shift with
  1693. * saturation to 8-bit formats.
  1694. */
  1695. static inline void hpm_dsp_scale_u8(uint8_t *src, q7_t scalefract, int8_t shift, uint8_t *dst, uint32_t size)
  1696. {
  1697. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1698. riscv_dsp_scale_u8(src, scalefract, shift, dst, size);
  1699. #endif
  1700. }
  1701. // Shift
  1702. /**
  1703. * @brief Shifts a q15 vector with a specified shift number.
  1704. * @param[in] *src the input vector.
  1705. * @param[in] shift number of shift bits. If (shift > 0) means shifts
  1706. * left; (shift < 0) means shifts right.
  1707. * @param[out] *dst the output vector.
  1708. * @param[in] size size of the vectors.
  1709. *
  1710. * The input and output are all saturated to q15 range [0x8000 0x7FFF].
  1711. */
  1712. static inline void hpm_dsp_shift_q15(q15_t *src, int8_t shift, q15_t *dst, uint32_t size)
  1713. {
  1714. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1715. #ifdef __zcc__
  1716. tpt_shift_q15(dst, src, shift, size);
  1717. #else
  1718. riscv_dsp_shift_q15(src, shift, dst, size);
  1719. #endif
  1720. #endif
  1721. }
  1722. /**
  1723. * @brief Shifts a q31 vector with a specified shift number.
  1724. * @param[in] *src the input vector.
  1725. * @param[in] shift number of shift bits. If (shift > 0) means shifts
  1726. * left; (shift < 0) means shifts right.
  1727. * @param[out] *dst the output vector.
  1728. * @param[in] size size of the vectors.
  1729. *
  1730. * The input and output are all saturated to q31 range [0x80000000 0x7FFFFFFF].
  1731. */
  1732. static inline void hpm_dsp_shift_q31(q31_t *src, int8_t shift, q31_t *dst, uint32_t size)
  1733. {
  1734. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1735. #ifdef __zcc__
  1736. tpt_shift_q31(dst, src, shift, size);
  1737. #else
  1738. riscv_dsp_shift_q31(src, shift, dst, size);
  1739. #endif
  1740. #endif
  1741. }
  1742. /**
  1743. * @brief Shifts a q7 vector with a specified shift number.
  1744. * @param[in] *src the input vector.
  1745. * @param[in] shift number of shift bits. If (shift > 0) means shifts
  1746. * left; (shift < 0) means shifts right.
  1747. * @param[out] *dst the output vector.
  1748. * @param[in] size size of the vectors.
  1749. *
  1750. * The input and output are all saturated to q7 range [0x80 0x7F].
  1751. */
  1752. static inline void hpm_dsp_shift_q7(q7_t *src, int8_t shift, q7_t *dst, uint32_t size)
  1753. {
  1754. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1755. #ifdef __zcc__
  1756. tpt_shift_q7(dst, src, shift, size);
  1757. #else
  1758. riscv_dsp_shift_q7(src, shift, dst, size);
  1759. #endif
  1760. #endif
  1761. }
  1762. /**
  1763. * @brief Shifts a u8 vector for a specified shift number.
  1764. * @param[in] *src the input vector.
  1765. * @param[in] shift number of shift bits. If (shift > 0) means shifts
  1766. * left; (shift < 0) means shifts right.
  1767. * @param[out] *dst the output vector.
  1768. * @param[in] size size of the vectors.
  1769. *
  1770. * The input and output are all saturated to u8 range [0x00 0xFF].
  1771. */
  1772. static inline void hpm_dsp_shift_u8(uint8_t *src, int8_t shift, uint8_t *dst, uint32_t size)
  1773. {
  1774. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1775. riscv_dsp_shift_u8(src, shift, dst, size);
  1776. #endif
  1777. }
  1778. /**
  1779. * @addtogroup basic_clip
  1780. * @{
  1781. */
  1782. /**
  1783. * @brief Elementwise clipping of f32 function.
  1784. * @param[in] *src pointer of the input vector
  1785. * @param[out] *dst pointer of the output vector
  1786. * @param[in] low lower bound.
  1787. * @param[in] high higher bound.
  1788. * @param[in] size number of elements in a vector
  1789. *
  1790. */
  1791. static inline void hpm_dsp_clip_f32(float32_t *src, float32_t *dst, float32_t low, float32_t high, uint32_t size)
  1792. {
  1793. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1794. #ifdef __zcc__
  1795. tpt_clip_f32(dst, src, low, high, size);
  1796. #else
  1797. riscv_dsp_clip_f32(src, dst, low, high, size);
  1798. #endif
  1799. #endif
  1800. }
  1801. /**
  1802. * @brief Elementwise clipping of q31 function.
  1803. * @param[in] *src pointer of the input vector
  1804. * @param[out] *dst pointer of the output vector
  1805. * @param[in] low lower bound.
  1806. * @param[in] high higher bound.
  1807. * @param[in] size number of elements in a vector
  1808. *
  1809. */
  1810. static inline void hpm_dsp_clip_q31(q31_t *src, q31_t *dst, q31_t low, q31_t high, uint32_t size)
  1811. {
  1812. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1813. #ifdef __zcc__
  1814. tpt_clip_q31(dst, src, low, high, size);
  1815. #else
  1816. riscv_dsp_clip_q31(src, dst, low, high, size);
  1817. #endif
  1818. #endif
  1819. }
  1820. /**
  1821. * @brief Elementwise clipping of q15 function.
  1822. * @param[in] *src pointer of the input vector
  1823. * @param[out] *dst pointer of the output vector
  1824. * @param[in] low lower bound.
  1825. * @param[in] high higher bound.
  1826. * @param[in] size number of elements in a vector
  1827. *
  1828. */
  1829. static inline void hpm_dsp_clip_q15(q15_t *src, q15_t *dst, q15_t low, q15_t high, uint32_t size)
  1830. {
  1831. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1832. #ifdef __zcc__
  1833. tpt_clip_q15(dst, src, low, high, size);
  1834. #else
  1835. riscv_dsp_clip_q15(src, dst, low, high, size);
  1836. #endif
  1837. #endif
  1838. }
  1839. /**
  1840. * @brief Elementwise clipping of q7 function.
  1841. * @param[in] *src pointer of the input vector
  1842. * @param[out] *dst pointer of the output vector
  1843. * @param[in] low lower bound.
  1844. * @param[in] high higher bound.
  1845. * @param[in] size number of elements in a vector
  1846. *
  1847. */
  1848. static inline void hpm_dsp_clip_q7(q7_t *src, q7_t *dst, q7_t low, q7_t high, uint32_t size)
  1849. {
  1850. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1851. #ifdef __zcc__
  1852. tpt_clip_q7(dst, src, low, high, size);
  1853. #else
  1854. riscv_dsp_clip_q7(src, dst, low, high, size);
  1855. #endif
  1856. #endif
  1857. }
  1858. /** @} basic_clip */
  1859. // AND
  1860. /**
  1861. * @defgroup basic_and Bitwise AND Functions
  1862. * @brief Bitwise AND Functions
  1863. *
  1864. * Bitwise AND functions calculate logical bitwise AND value from separate source vectors and write the results one-by-one into a destination vector.
  1865. *
  1866. * Andes DSP library supports distinct bitwise AND functions for U32, U15 and U8 data types. These functions are introduced in the subsections below.
  1867. */
  1868. /**
  1869. * @addtogroup basic_and
  1870. * @{
  1871. */
  1872. /**
  1873. * @brief Compute the logical bitwise AND of two u32 vectors.
  1874. * @param[in] *src1 pointer of the first input vector
  1875. * @param[in] *src2 pointer of the second input vector
  1876. * @param[out] *dst pointer of the output vector
  1877. * @param[in] size number of elements in a vector
  1878. *
  1879. */
  1880. static inline void hpm_dsp_and_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
  1881. {
  1882. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1883. #ifdef __zcc__
  1884. tpt_and_32bit(dst, src1, src2, size);
  1885. #else
  1886. riscv_dsp_and_u32(src1, src2, dst, size);
  1887. #endif
  1888. #endif
  1889. }
  1890. /**
  1891. * @brief Compute the logical bitwise AND of two u8 vectors.
  1892. * @param[in] *src1 pointer of the first input vector
  1893. * @param[in] *src2 pointer of the second input vector
  1894. * @param[out] *dst pointer of the output vector
  1895. * @param[in] size number of elements in a vector
  1896. *
  1897. */
  1898. static inline void hpm_dsp_and_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
  1899. {
  1900. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1901. #ifdef __zcc__
  1902. tpt_and_8bit(dst, src1, src2, size);
  1903. #else
  1904. riscv_dsp_and_u8(src1, src2, dst, size);
  1905. #endif
  1906. #endif
  1907. }
  1908. /** @} basic_and */
  1909. // OR
  1910. /**
  1911. * @defgroup basic_or Bitwise Inclusive OR Functions
  1912. * @brief Bitwise Inclusive OR Functions
  1913. *
  1914. * Bitwise inclusive OR functions calculate logical bitwise OR value from separate source vectors and write the results one-by-one into a destination vector.
  1915. *
  1916. * Andes DSP library supports distinct bitwise inclusive OR functions for U32, U15 and U8 data types. These functions are introduced in the subsections below.
  1917. */
  1918. /**
  1919. * @addtogroup basic_or
  1920. * @{
  1921. */
  1922. /**
  1923. * @brief Compute the logical bitwise OR of two u32 vectors.
  1924. * @param[in] *src1 pointer of the first input vector
  1925. * @param[in] *src2 pointer of the second input vector
  1926. * @param[out] *dst pointer of the output vector
  1927. * @param[in] size number of elements in a vector
  1928. *
  1929. */
  1930. static inline void hpm_dsp_or_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
  1931. {
  1932. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1933. #ifdef __zcc__
  1934. tpt_or_32bit(dst, src1, src2, size);
  1935. #else
  1936. riscv_dsp_or_u32(src1, src2, dst, size);
  1937. #endif
  1938. #endif
  1939. }
  1940. /**
  1941. * @brief Compute the logical bitwise OR of two u16 vectors.
  1942. * @param[in] *src1 pointer of the first input vector
  1943. * @param[in] *src2 pointer of the second input vector
  1944. * @param[out] *dst pointer of the output vector
  1945. * @param[in] size number of elements in a vector
  1946. *
  1947. */
  1948. static inline void hpm_dsp_or_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
  1949. {
  1950. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1951. #ifdef __zcc__
  1952. tpt_or_16bit(dst, src1, src2, size);
  1953. #else
  1954. riscv_dsp_or_u16(src1, src2, dst, size);
  1955. #endif
  1956. #endif
  1957. }
  1958. /**
  1959. * @brief Compute the logical bitwise OR of two u8 vectors.
  1960. * @param[in] *src1 pointer of the first input vector
  1961. * @param[in] *src2 pointer of the second input vector
  1962. * @param[out] *dst pointer of the output vector
  1963. * @param[in] size number of elements in a vector
  1964. *
  1965. */
  1966. static inline void hpm_dsp_or_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
  1967. {
  1968. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  1969. #ifdef __zcc__
  1970. tpt_or_8bit(dst, src1, src2, size);
  1971. #else
  1972. riscv_dsp_or_u8(src1, src2, dst, size);
  1973. #endif
  1974. #endif
  1975. }
  1976. /** @} basic_or */
  1977. // XOR
  1978. /**
  1979. * @defgroup basic_xor Bitwise exclusive OR Functions
  1980. * @brief Bitwise exclusive OR Functions
  1981. *
  1982. * Bitwise exclusive OR (XOR) functions calculate logical bitwise XOR value from separate source vectors and write the results one-by-one into a destination vector.
  1983. *
  1984. * Andes DSP library supports distinct bitwise XOR functions for U32, U15 and U8 data types. These functions are introduced in the subsections below.
  1985. */
  1986. /**
  1987. * @addtogroup basic_xor
  1988. * @{
  1989. */
  1990. /**
  1991. * @brief Compute the logical bitwise XOR of two u32 vectors.
  1992. * @param[in] *src1 pointer of the first input vector
  1993. * @param[in] *src2 pointer of the second input vector
  1994. * @param[out] *dst pointer of the output vector
  1995. * @param[in] size number of elements in a vector
  1996. *
  1997. */
  1998. static inline void hpm_dsp_xor_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
  1999. {
  2000. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2001. #ifdef __zcc__
  2002. tpt_xor_32bit(dst, src1, src2, size);
  2003. #else
  2004. riscv_dsp_xor_u32(src1, src2, dst, size);
  2005. #endif
  2006. #endif
  2007. }
  2008. /**
  2009. * @brief Compute the logical bitwise XOR of two u16 vectors.
  2010. * @param[in] *src1 pointer of the first input vector
  2011. * @param[in] *src2 pointer of the second input vector
  2012. * @param[out] *dst pointer of the output vector
  2013. * @param[in] size number of elements in a vector
  2014. *
  2015. */
  2016. static inline void hpm_dsp_xor_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
  2017. {
  2018. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2019. #ifdef __zcc__
  2020. tpt_xor_16bit(dst, src1, src2, size);
  2021. #else
  2022. riscv_dsp_xor_u16(src1, src2, dst, size);
  2023. #endif
  2024. #endif
  2025. }
  2026. /**
  2027. * @brief Compute the logical bitwise XOR of two u8 vectors.
  2028. * @param[in] *src1 pointer of the first input vector
  2029. * @param[in] *src2 pointer of the second input vector
  2030. * @param[out] *dst pointer of the output vector
  2031. * @param[in] size number of elements in a vector
  2032. *
  2033. */
  2034. static inline void hpm_dsp_xor_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
  2035. {
  2036. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2037. #ifdef __zcc__
  2038. tpt_xor_8bit(dst, src1, src2, size);
  2039. #else
  2040. riscv_dsp_xor_u8(src1, src2, dst, size);
  2041. #endif
  2042. #endif
  2043. }
  2044. /** @} basic_xor */
  2045. // Not
  2046. /**
  2047. * @defgroup basic_not Bitwise NOT Functions
  2048. * @brief Bitwise NOT Functions
  2049. *
  2050. * Bitwise NOT functions calculate logical bitwise NOT value from elements of a source vector and write them one-by-one into a destination vector.
  2051. *
  2052. * Andes DSP library supports distinct bitwise NOT functions for U32, U15 and U8 data types. These functions are introduced in the subsections below.
  2053. */
  2054. /**
  2055. * @addtogroup basic_not
  2056. * @{
  2057. */
  2058. /**
  2059. * @brief Compute the logical bitwise NOT of u32 vector.
  2060. * @param[in] *src pointer of the input vector
  2061. * @param[out] *dst pointer of the output vector
  2062. * @param[in] size number of elements in a vector
  2063. *
  2064. */
  2065. static inline void hpm_dsp_not_u32(u32_t *src, u32_t *dst, uint32_t size)
  2066. {
  2067. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2068. #ifdef __zcc__
  2069. tpt_not_32bit(dst, src, size);
  2070. #else
  2071. riscv_dsp_not_u32(src, dst, size);
  2072. #endif
  2073. #endif
  2074. }
  2075. /**
  2076. * @brief Compute the logical bitwise NOT of u16 vector.
  2077. * @param[in] *src pointer of the input vector
  2078. * @param[out] *dst pointer of the output vector
  2079. * @param[in] size number of elements in a vector
  2080. *
  2081. */
  2082. static inline void hpm_dsp_not_u16(u16_t *src, u16_t *dst, uint32_t size)
  2083. {
  2084. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2085. #ifdef __zcc__
  2086. tpt_not_16bit(dst, src, size);
  2087. #else
  2088. riscv_dsp_not_u16(src, dst, size);
  2089. #endif
  2090. #endif
  2091. }
  2092. /**
  2093. * @brief Compute the logical bitwise NOT of u8 vector.
  2094. * @param[in] *src pointer of the input vector
  2095. * @param[out] *dst pointer of the output vector
  2096. * @param[in] size number of elements in a vector
  2097. *
  2098. */
  2099. static inline void hpm_dsp_not_u8(u8_t *src, u8_t *dst, uint32_t size)
  2100. {
  2101. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2102. #ifdef __zcc__
  2103. tpt_not_8bit(dst, src, size);
  2104. #else
  2105. riscv_dsp_not_u8(src, dst, size);
  2106. #endif
  2107. #endif
  2108. }
  2109. /** @} basic_not */
  2110. /** @} basic */
  2111. #endif
  2112. #include <stdint.h>
  2113. /**
  2114. * @brief Reserve 8bit data lsb to msb
  2115. *
  2116. * @param[in] lsb lsb data
  2117. * @return uint8_t msb
  2118. */
  2119. uint8_t hpm_math_sw_reverse_bit8_lsb_to_msb(uint8_t lsb);
  2120. /**
  2121. * @brief Reserve 8bit data msb to lsb
  2122. *
  2123. * @param[in] msb msb data
  2124. * @return uint8_t lsb
  2125. */
  2126. uint8_t hpm_math_sw_reverse_bit8_msb_to_lsb(uint8_t msb);
  2127. /**
  2128. * @brief Reserve 32bit data lsb to msb
  2129. *
  2130. * @param[in] lsb lsb data
  2131. * @return uint32_t msb
  2132. */
  2133. uint32_t hpm_math_sw_reverse_bit32_lsb_to_msb(uint32_t lsb);
  2134. /**
  2135. * @brief Reserve 32bit data msb to lsb
  2136. *
  2137. * @param[in] msb msb data
  2138. * @return uint32_t lsb
  2139. */
  2140. uint32_t hpm_math_sw_reverse_bit32_msb_to_lsb(uint32_t msb);
  2141. #endif
  2142. #ifdef HPM_MATH_DSP_COMPLEX
  2143. /**
  2144. * @defgroup complex DSP Complex Functions
  2145. * This set of functions operates on complex data vectors.
  2146. * The data in the input <code>src</code> vector and output <code>dst</code>
  2147. * are arranged in the array as: [real, imag, real, imag, real, imag, ...).
  2148. * @ingroup hpmmath
  2149. * @{
  2150. */
  2151. #ifdef HPM_EN_MATH_DSP_LIB
  2152. #ifdef __zcc__
  2153. #include "tpt_math.h"
  2154. #endif
  2155. #include "riscv_dsp_complex_math.h"
  2156. // Complex Conjugate
  2157. /**
  2158. * @brief Conjugate the floating-potint complex vector.
  2159. * @param[in] *src the input complex vector.
  2160. * @param[out] *dst the output complex vector.
  2161. * @param[in] size size of the vectors.
  2162. */
  2163. static inline void hpm_dsp_cconj_f32(const float32_t *src, float32_t *dst, uint32_t size)
  2164. {
  2165. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2166. #ifdef __zcc__
  2167. tpt_cmplx_conj_f32(dst, src, size);
  2168. #else
  2169. riscv_dsp_cconj_f32(src, dst, size);
  2170. #endif
  2171. #endif
  2172. }
  2173. /**
  2174. * @brief Conjugate the q15 complex vector.
  2175. * @param[in] *src the input complex vector.
  2176. * @param[out] *dst the output complex vector.
  2177. * @param[in] size size of the vectors.
  2178. *
  2179. * The Q15 value INT16_MIN (0x8000) will be saturated to the maximum
  2180. * allowable positive value INT16_MAX.
  2181. */
  2182. static inline void hpm_dsp_cconj_q15(const q15_t *src, q15_t *dst, uint32_t size)
  2183. {
  2184. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2185. #ifdef __zcc__
  2186. tpt_cmplx_conj_q15(dst, src, size);
  2187. #else
  2188. riscv_dsp_cconj_q15(src, dst, size);
  2189. #endif
  2190. #endif
  2191. }
  2192. /**
  2193. * @brief Conjugate the q31 complex vector.
  2194. * @param[in] *src the input complex vector.
  2195. * @param[out] *dst the output complex vector.
  2196. * @param[in] size size of the vectors.
  2197. *
  2198. * The Q31 value INT32_MIN (0x80000000) will be saturated to the maximum
  2199. * allowable positive value INT32_MAX.
  2200. */
  2201. static inline void hpm_dsp_cconj_q31(const q31_t *src, q31_t *dst, uint32_t size)
  2202. {
  2203. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2204. #ifdef __zcc__
  2205. tpt_cmplx_conj_q31(dst, src, size);
  2206. #else
  2207. riscv_dsp_cconj_q31(src, dst, size);
  2208. #endif
  2209. #endif
  2210. }
  2211. // Complex Dot Product
  2212. /**
  2213. * @brief Compute the dot product of the floating-potint complex vector.
  2214. * @param[in] *src1 the first input complex vector.
  2215. * @param[in] *src2 the second input complex vector.
  2216. * @param[in] size size of the vectors.
  2217. * @param[out] *dst the output vector.
  2218. */
  2219. static inline void hpm_dsp_cdprod_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *dst)
  2220. {
  2221. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2222. riscv_dsp_cdprod_f32(src1, src2, size, dst);
  2223. #endif
  2224. }
  2225. /**
  2226. * @brief Compute the dot product type2 of the floating-potint complex vector.
  2227. * @param[in] *src1 the first input complex vector.
  2228. * @param[in] *src2 the second input complex vector.
  2229. * @param[in] size size of the vectors.
  2230. * @param[out] *rout the real sum of the output.
  2231. * @param[out] *iout the imag sum of the output.
  2232. */
  2233. static inline void hpm_dsp_cdprod_typ2_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *rout, float32_t *iout)
  2234. {
  2235. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2236. #ifdef __zcc__
  2237. tpt_cmplx_dot_prod_f32(rout, iout, src1, src2, size);
  2238. #else
  2239. riscv_dsp_cdprod_typ2_f32(src1, src2, size, rout, iout);
  2240. #endif
  2241. #endif
  2242. }
  2243. /**
  2244. * @brief Compute the dot product of the q15 complex vector.
  2245. * @param[in] *src1 the first input complex vector.
  2246. * @param[in] *src2 the second input complex vector.
  2247. * @param[in] size size of the vectors.
  2248. * @param[out] *dst the output vector.
  2249. *
  2250. * The multiplication outputs are in 1.15 x 1.15 = 2.30 format and
  2251. * finally output is shift into 3.13 format.
  2252. */
  2253. static inline void hpm_dsp_cdprod_q15(const q15_t *src1, const q15_t *src2, uint32_t size, q15_t *dst)
  2254. {
  2255. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2256. riscv_dsp_cdprod_q15(src1, src2, size, dst);
  2257. #endif
  2258. }
  2259. /**
  2260. * @brief Compute the dot product type2 of the q15 complex vector.
  2261. * @param[in] *src1 the first input complex vector.
  2262. * @param[in] *src2 the second input complex vector.
  2263. * @param[in] size size of the vectors.
  2264. * @param[out] *rout the real sum of the output.
  2265. * @param[out] *iout the imag sum of the output.
  2266. *
  2267. * The multiplication outputs are in 1.15 x 1.15 = 2.30 format and
  2268. * finally output is shift into q24 format.
  2269. */
  2270. static inline void hpm_dsp_cdprod_typ2_q15(const q15_t *src1, const q15_t *src2, uint32_t size, q31_t *rout, q31_t *iout)
  2271. {
  2272. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2273. riscv_dsp_cdprod_typ2_q15(src1, src2, size, rout, iout);
  2274. #endif
  2275. }
  2276. /**
  2277. * @brief Compute the dot product of the q31 complex vector.
  2278. * @param[in] *src1 the first input complex vector.
  2279. * @param[in] *src2 the second input complex vector.
  2280. * @param[in] size size of the vectors.
  2281. * @param[out] *dst the output vector.
  2282. *
  2283. * The multiplication outputs are in 1.31 x 1.31 = 2.62 format and
  2284. * finally output is shift into 3.29 format.
  2285. */
  2286. static inline void hpm_dsp_cdprod_q31(const q31_t *src1, const q31_t *src2, uint32_t size, q31_t *dst)
  2287. {
  2288. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2289. riscv_dsp_cdprod_q31(src1, src2, size, dst);
  2290. #endif
  2291. }
  2292. /**
  2293. * @brief Compute the dot product type2 of the q31 complex vector.
  2294. * @param[in] *src1 the first input complex vector.
  2295. * @param[in] *src2 the second input complex vector.
  2296. * @param[in] size size of the vectors.
  2297. * @param[out] *rout the real sum of the output.
  2298. * @param[out] *iout the imag sum of the output.
  2299. *
  2300. * The multiplication outputs are in 1.31 x 1.31 = 2.62 format and
  2301. * finally output is shift into q48 format.
  2302. */
  2303. static inline void hpm_dsp_cdprod_typ2_q31(const q31_t *src1, const q31_t *src2, uint32_t size, q63_t *rout, q63_t *iout)
  2304. {
  2305. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2306. #ifdef __zcc__
  2307. tpt_cmplx_dot_prod_q31(rout, iout, src1, src2, size);
  2308. #else
  2309. riscv_dsp_cdprod_typ2_q31(src1, src2, size, rout, iout);
  2310. #endif
  2311. #endif
  2312. }
  2313. // Complex Magnitude
  2314. /**
  2315. * @brief Compute the magnitude of the floating-potint complex vector.
  2316. * @param[in] *src points to the input complex vector.
  2317. * @param[out] *dst points to the output complex vector.
  2318. * @param[in] size size of the vectors.
  2319. */
  2320. static inline void hpm_dsp_cmag_f32(const float32_t *src, float32_t *dst, uint32_t size)
  2321. {
  2322. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2323. #ifdef __zcc__
  2324. tpt_cmplx_mag_f32(dst, src, size);
  2325. #else
  2326. riscv_dsp_cmag_f32(src, dst, size);
  2327. #endif
  2328. #endif
  2329. }
  2330. /**
  2331. * @brief Compute the magnitude of the q15 complex vector.
  2332. * @param[in] *src points to the input complex vector.
  2333. * @param[out] *dst points to the output complex vector.
  2334. * @param[in] size size of the vectors.
  2335. *
  2336. * The multiplication outputs are in 1.15 x 1.15 = 2.30 format and
  2337. * finally output is shift into 2.14 format.
  2338. */
  2339. static inline void hpm_dsp_cmag_q15(const q15_t *src, q15_t *dst, uint32_t size)
  2340. {
  2341. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2342. #ifdef __zcc__
  2343. tpt_cmplx_mag_q15(dst, src, size);
  2344. #else
  2345. riscv_dsp_cmag_q15(src, dst, size);
  2346. #endif
  2347. #endif
  2348. }
  2349. /**
  2350. * @brief Compute the magnitude of the q31 complex vector.
  2351. * @param[in] *src points to the input complex vector.
  2352. * @param[out] *dst points to the output complex vector.
  2353. * @param[in] size size of the vectors.
  2354. *
  2355. * The multiplication outputs are in 1.31 x 1.31 = 2.62 format and
  2356. * finally output is shift into 2.30 format.
  2357. */
  2358. static inline void hpm_dsp_cmag_q31(const q31_t *src, q31_t *dst, uint32_t size)
  2359. {
  2360. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2361. #ifdef __zcc__
  2362. tpt_cmplx_mag_q31(dst, src, size);
  2363. #else
  2364. riscv_dsp_cmag_q31(src, dst, size);
  2365. #endif
  2366. #endif
  2367. }
  2368. // Complex Magnitude Squared
  2369. /**
  2370. * @brief Compute the magnitude squared of the floating-potint complex
  2371. * vector.
  2372. * @param[in] *src points to the input complex vector.
  2373. * @param[out] *dst points to the output complex vector.
  2374. * @param[in] size size of the vectors.
  2375. */
  2376. static inline void hpm_dsp_cmag_sqr_f32(const float32_t *src, float32_t *dst, uint32_t size)
  2377. {
  2378. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2379. #ifdef __zcc__
  2380. tpt_cmplx_mag_squared_f32(dst, src, size);
  2381. #else
  2382. riscv_dsp_cmag_sqr_f32(src, dst, size);
  2383. #endif
  2384. #endif
  2385. }
  2386. /**
  2387. * @brief Compute the magnitude squared of the q15 complex vector.
  2388. * @param[in] *src points to the input complex vector.
  2389. * @param[out] *dst points to the output complex vector.
  2390. * @param[in] size size of the vectors.
  2391. *
  2392. * The multiplication outputs are in 1.15 x 1.15 = 2.30 format and
  2393. * finally output is shift into 3.13 format.
  2394. */
  2395. static inline void hpm_dsp_cmag_sqr_q15(const q15_t *src, q15_t *dst, uint32_t size)
  2396. {
  2397. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2398. #ifdef __zcc__
  2399. tpt_cmplx_mag_squared_q15(dst, src, size);
  2400. #else
  2401. riscv_dsp_cmag_sqr_q15(src, dst, size);
  2402. #endif
  2403. #endif
  2404. }
  2405. /**
  2406. * @brief Compute the magnitude squared of the q31 complex vector.
  2407. * @param[in] *src points to the input complex vector.
  2408. * @param[out] *dst points to the output complex vector.
  2409. * @param[in] size size of the vectors.
  2410. *
  2411. * The multiplication outputs are in 1.31 x 1.31 = 2.62 format and
  2412. * finally output is shift into 3.29 format.
  2413. */
  2414. static inline void hpm_dsp_cmag_sqr_q31(const q31_t *src, q31_t *dst, uint32_t size)
  2415. {
  2416. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2417. #ifdef __zcc__
  2418. tpt_cmplx_mag_squared_q31(dst, src, size);
  2419. #else
  2420. riscv_dsp_cmag_sqr_q31(src, dst, size);
  2421. #endif
  2422. #endif
  2423. }
  2424. // Complex Multiplication
  2425. /**
  2426. * @brief Multiply two folating-point complex vector.
  2427. * @param[in] *src1 the first input complex vector.
  2428. * @param[in] *src2 the second input complex vector.
  2429. * @param[out] *dst output complex vector.
  2430. * @param[in] size size of the vectors.
  2431. */
  2432. static inline void hpm_dsp_cmul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t size)
  2433. {
  2434. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2435. #ifdef __zcc__
  2436. tpt_cmplx_mult_cmplx_f32(dst, src1, src2, size);
  2437. #else
  2438. riscv_dsp_cmul_f32(src1, src2, dst, size);
  2439. #endif
  2440. #endif
  2441. }
  2442. /**
  2443. * @brief Multiply two q15 complex vector.
  2444. * @param[in] *src1 the first input complex vector.
  2445. * @param[in] *src2 the second input complex vector.
  2446. * @param[out] *dst output complex vector.
  2447. * @param[in] size size of the vectors.
  2448. *
  2449. * The multiplication outputs are in 1.15 x 1.15 = 2.30 format and
  2450. * finally output is shift into 3.13 format.
  2451. */
  2452. static inline void hpm_dsp_cmul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t size)
  2453. {
  2454. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2455. #ifdef __zcc__
  2456. tpt_cmplx_mult_cmplx_q15(dst, src1, src2, size);
  2457. #else
  2458. riscv_dsp_cmul_q15(src1, src2, dst, size);
  2459. #endif
  2460. #endif
  2461. }
  2462. /**
  2463. * @brief Multiply two q31 complex vector.
  2464. * @param[in] *src1 the first input complex vector.
  2465. * @param[in] *src2 the second input complex vector.
  2466. * @param[out] *dst output complex vector.
  2467. * @param[in] size size of the vectors.
  2468. *
  2469. * The multiplication outputs are in 1.31 x 1.31 = 2.62 format and
  2470. * finally output is shift into 3.29 format.
  2471. */
  2472. static inline void hpm_dsp_cmul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t size)
  2473. {
  2474. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2475. #ifdef __zcc__
  2476. tpt_cmplx_mult_cmplx_q31(dst, src1, src2, size);
  2477. #else
  2478. riscv_dsp_cmul_q31(src1, src2, dst, size);
  2479. #endif
  2480. #endif
  2481. }
  2482. // Complex-by-Real Multiplication
  2483. /**
  2484. * @brief Multiply the folating-point complex vector by a real vector.
  2485. * @param[in] *src the input complex vector.
  2486. * @param[in] *real the input real vector.
  2487. * @param[out] *dst output complex vector.
  2488. * @param[in] size size of the vectors.
  2489. */
  2490. static inline void hpm_dsp_cmul_real_f32(const float32_t *src, const float32_t *real, float32_t *dst, uint32_t size)
  2491. {
  2492. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2493. #ifdef __zcc__
  2494. tpt_cmplx_mult_real_f32(dst, src, real, size);
  2495. #else
  2496. riscv_dsp_cmul_real_f32(src, real, dst, size);
  2497. #endif
  2498. #endif
  2499. }
  2500. /**
  2501. * @brief Multiply the q15 complex vector by a real vector.
  2502. * @param[in] *src the input complex vector.
  2503. * @param[in] *real the input real vector.
  2504. * @param[out] *dst output complex vector.
  2505. * @param[in] size size of the vectors.
  2506. *
  2507. * Output results will be saturated in Q15 range [0x8000 0x7FFF].
  2508. */
  2509. static inline void hpm_dsp_cmul_real_q15(const q15_t *src, const q15_t *real, q15_t *dst, uint32_t size)
  2510. {
  2511. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2512. #ifdef __zcc__
  2513. tpt_cmplx_mult_real_q15(dst, src, real, size);
  2514. #else
  2515. riscv_dsp_cmul_real_q15(src, real, dst, size);
  2516. #endif
  2517. #endif
  2518. }
  2519. /**
  2520. * @brief Multiply the q31 complex vector by a real vector.
  2521. * @param[in] *src the input complex vector.
  2522. * @param[in] *real the input real vector.
  2523. * @param[out] *dst output complex vector.
  2524. * @param[in] size size of the vectors.
  2525. *
  2526. * Output results will be saturated in Q31 range[0x80000000 0x7FFFFFFF].
  2527. */
  2528. static inline void hpm_dsp_cmul_real_q31(const q31_t *src, const q31_t *real, q31_t *dst, uint32_t size)
  2529. {
  2530. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2531. #ifdef __zcc__
  2532. tpt_cmplx_mult_real_q31(dst, src, real, size);
  2533. #else
  2534. riscv_dsp_cmul_real_q31(src, real, dst, size);
  2535. #endif
  2536. #endif
  2537. }
  2538. #endif
  2539. #endif
  2540. /**
  2541. * @}
  2542. *
  2543. */
  2544. #ifdef HPM_MATH_DSP_CONTROLLER
  2545. /**
  2546. * @defgroup controller DSP Controller Functions
  2547. * @ingroup hpmmath
  2548. * @{
  2549. */
  2550. #ifdef HPM_EN_MATH_DSP_LIB
  2551. #include "riscv_dsp_controller_math.h"
  2552. // Clarke Transform
  2553. /**
  2554. * @brief Clarke transform of floating-point input.
  2555. * @param[in] a input three-phase coordinate a.
  2556. * @param[in] b input three-phase coordinate b.
  2557. * @param[out] *alpha output two-phase orthogonal vector axis alpha.
  2558. * @param[out] *beta output two-phase orthogonal vector axis beta.
  2559. */
  2560. static inline void hpm_dsp_clarke_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta)
  2561. {
  2562. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2563. riscv_dsp_clarke_f32(a, b, alpha, beta);
  2564. #endif
  2565. }
  2566. /**
  2567. * @brief Clarke transform of q31 input.
  2568. * @param[in] a input three-phase coordinate a.
  2569. * @param[in] b input three-phase coordinate b.
  2570. * @param[out] *alpha output two-phase orthogonal vector axis alpha.
  2571. * @param[out] *beta output two-phase orthogonal vector axis beta.
  2572. *
  2573. * The internal 32-bit accumulator maintains 1.31 format by truncating lower
  2574. * 31 bits of the intermediate multiplication in 2.62 format.
  2575. */
  2576. static inline void hpm_dsp_clarke_q31(q31_t a, q31_t b, q31_t *alpha, q31_t *beta)
  2577. {
  2578. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2579. riscv_dsp_clarke_q31(a, b, alpha, beta);
  2580. #endif
  2581. }
  2582. // Inverse Clarke Transform
  2583. /**
  2584. * @brief Inverse Clarke transform of floating-point input.
  2585. * @param[in] alpha input two-phase orthogonal vector axis alpha.
  2586. * @param[in] beta input two-phase orthogonal vector axis beta.
  2587. * @param[out] *a output three-phase coordinate a.
  2588. * @param[in] *b output three-phase coordinate b.
  2589. */
  2590. static inline void hpm_dsp_inv_clarke_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b)
  2591. {
  2592. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2593. riscv_dsp_inv_clarke_f32(alpha, beta, a, b);
  2594. #endif
  2595. }
  2596. /**
  2597. * @brief Inverse Clarke transform of q31 input.
  2598. * @param[in] alpha input two-phase orthogonal vector axis alpha.
  2599. * @param[in] beta input two-phase orthogonal vector axis beta.
  2600. * @param[out] *a output three-phase coordinate a.
  2601. * @param[in] *b output three-phase coordinate b.
  2602. *
  2603. * The internal 32-bit accumulator maintains 1.31 format by truncating lower
  2604. * 31 bits of the intermediate multiplication in 2.62 format.
  2605. */
  2606. static inline void hpm_dsp_inv_clarke_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b)
  2607. {
  2608. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2609. riscv_dsp_inv_clarke_q31(alpha, beta, a, b);
  2610. #endif
  2611. }
  2612. // Park Transform
  2613. /**
  2614. * @brief Park transform of floating-point input.
  2615. * @param[in] alpha input two-phase coordinate alpha.
  2616. * @param[in] beta input two-phase coordinate beta.
  2617. * @param[out] *a output rotor frame a.
  2618. * @param[out] *b output rotor frame b.
  2619. * @param[in] sin sine value of rotation angle .
  2620. * @param[in] cos cosine value of rotation angle .
  2621. */
  2622. static inline void hpm_dsp_park_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b, float32_t sin, float32_t cos)
  2623. {
  2624. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2625. riscv_dsp_park_f32(alpha, beta, a, b, sin, cos);
  2626. #endif
  2627. }
  2628. /**
  2629. * @brief Park transform of q31 input.
  2630. * @param[in] alpha input two-phase coordinate alpha.
  2631. * @param[in] beta input two-phase coordinate beta.
  2632. * @param[out] *a output rotor frame a.
  2633. * @param[out] *b output rotor frame b.
  2634. * @param[in] sin sine value of rotation angle .
  2635. * @param[in] cos cosine value of rotation angle .
  2636. *
  2637. * The internal 32-bit accumulator maintains 1.31 format by truncating lower
  2638. * 31 bits of the intermediate multiplication in 2.62 format.
  2639. */
  2640. static inline void hpm_dsp_park_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b, q31_t sin, q31_t cos)
  2641. {
  2642. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2643. riscv_dsp_park_q31(alpha, beta, a, b, sin, cos);
  2644. #endif
  2645. }
  2646. // Inverse Park Transform
  2647. /**
  2648. * @brief Inverse Park transform of floating-point input.
  2649. * @param[in] a input coordinate of rotor frame a.
  2650. * @param[in] b input coordinate of rotor frame b.
  2651. * @param[out] *alpha output two-phase orthogonal vec axis alpha.
  2652. * @param[out] *beta output two-phase orthogonal vec axis beta.
  2653. * @param[in] sin sine value of rotation angle .
  2654. * @param[in] cos cosine value of rotation angle .
  2655. */
  2656. static inline void hpm_dsp_inv_park_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta, float32_t sin, float32_t cos)
  2657. {
  2658. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2659. riscv_dsp_inv_park_f32(a, b, alpha, beta, sin, cos);
  2660. #endif
  2661. }
  2662. /**
  2663. * @brief Inverse Park transform of q31 input.
  2664. * @param[in] a input coordinate of rotor frame a.
  2665. * @param[in] b input coordinate of rotor frame b.
  2666. * @param[out] *alpha output two-phase orthogonal vec axis alpha.
  2667. * @param[out] *beta output two-phase orthogonal vec axis beta.
  2668. * @param[in] sin sine value of rotation angle .
  2669. * @param[in] cos cosine value of rotation angle .
  2670. *
  2671. * The internal 32-bit accumulator maintains 1.31 format by truncating lower
  2672. * 31 bits of the intermediate multiplication in 2.62 format.
  2673. */
  2674. static inline void hpm_dsp_inv_park_q31(q31_t a, q31_t b, q31_t *alpha, q31_t *beta, q31_t sin, q31_t cos)
  2675. {
  2676. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2677. riscv_dsp_inv_park_q31(a, b, alpha, beta, sin, cos);
  2678. #endif
  2679. }
  2680. /**
  2681. * @brief PID control of floating-point input.
  2682. * @param[in, out] *instance points to an instance of the PID
  2683. * controliler.
  2684. * @param[in] src input data.
  2685. * @return output data.
  2686. */
  2687. static inline float32_t hpm_dsp_pid_f32(riscv_dsp_pid_f32_t *instance, float32_t src)
  2688. {
  2689. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2690. return riscv_dsp_pid_f32(instance, src);
  2691. #endif
  2692. }
  2693. /**
  2694. * @brief PID initializatopn control function of floating-point formats.
  2695. * @param[in, out] *instance points to an instance of the PID
  2696. * controliler.
  2697. * @param[in] set for 1 will clear the state to all zeros
  2698. * 0 will not.
  2699. *
  2700. * This function will calculate the PID control structure gain
  2701. * <code>gain1</code>, <code>gain2</code> and <code>gain3</code> by seting
  2702. * the variable <code>Kp</code>, <code>Ki</code> and <code>Kd</code>. The
  2703. * state variable will set to all zeros.
  2704. */
  2705. static inline void hpm_dsp_init_pid_f32(riscv_dsp_pid_f32_t *instance, int32_t set)
  2706. {
  2707. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2708. riscv_dsp_init_pid_f32(instance, set);
  2709. #endif
  2710. }
  2711. /**
  2712. * @brief PID control of Q31 input.
  2713. * @param[in, out] *instance points to an instance of the PID
  2714. * controliler.
  2715. * @param[in] src input data.
  2716. * @return output data.
  2717. */
  2718. static inline q31_t hpm_dsp_pid_q31(riscv_dsp_pid_q31_t *instance, q31_t src)
  2719. {
  2720. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2721. return riscv_dsp_pid_q31(instance, src);
  2722. #endif
  2723. }
  2724. /**
  2725. * @brief PID initializatopn control function of Q31 formats.
  2726. * @param[in, out] *instance points to an instance of the PID
  2727. * controliler.
  2728. * @param[in] set for 1 will clear the state to all zeros
  2729. * 0 will not.
  2730. *
  2731. * This function will calculate the PID control structure gain
  2732. * <code>gain1</code>, <code>gain2</code> and <code>gain3</code> by seting
  2733. * the variable <code>Kp</code>, <code>Ki</code> and <code>Kd</code>. The
  2734. * state variable will set to all zeros.
  2735. */
  2736. static inline void hpm_dsp_init_pid_q31(riscv_dsp_pid_q31_t *instance, int32_t set)
  2737. {
  2738. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2739. riscv_dsp_init_pid_q31(instance, set);
  2740. #endif
  2741. }
  2742. static inline q15_t hpm_dsp_pid_q15(riscv_dsp_pid_q15_t *instance, q15_t src)
  2743. {
  2744. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2745. return riscv_dsp_pid_q15(instance, src);
  2746. #endif
  2747. }
  2748. /**
  2749. * @brief PID initializatopn control function of Q15 formats.
  2750. * @param[in, out] *instance points to an instance of the PID
  2751. * controliler.
  2752. * @param[in] set for 1 will clear the state to all zeros
  2753. * 0 will not.
  2754. *
  2755. * This function will calculate the PID control structure gain
  2756. * <code>gain1</code>, <code>gain2</code> and <code>gain3</code> by seting
  2757. * the variable <code>Kp</code>, <code>Ki</code> and <code>Kd</code>. The
  2758. * state variable will set to all zeros.
  2759. */
  2760. static inline void hpm_dsp_init_pid_q15(riscv_dsp_pid_q15_t *instance, int32_t set)
  2761. {
  2762. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2763. riscv_dsp_init_pid_q15(instance, set);
  2764. #endif
  2765. }
  2766. #endif
  2767. #endif
  2768. /**
  2769. * @}
  2770. *
  2771. */
  2772. #ifdef HPM_MATH_DSP_DISTANCE
  2773. /**
  2774. * @defgroup dist DSP Distance Functions
  2775. * @ingroup hpmmath
  2776. * @{
  2777. */
  2778. #ifdef HPM_EN_MATH_DSP_LIB
  2779. #ifdef __zcc__
  2780. #include "tpt_math.h"
  2781. #endif
  2782. #include "riscv_dsp_distance_math.h"
  2783. /**
  2784. * @brief Bray-Curtis distance between two vectors
  2785. * @param[in] src1 First vector
  2786. * @param[in] src2 Second vector
  2787. * @param[in] size vector length
  2788. * @return distance
  2789. */
  2790. static inline float32_t hpm_dsp_dist_bray_curtis_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
  2791. {
  2792. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2793. #ifdef __zcc__
  2794. return tpt_braycurtis_distance_f32(src1, src2, size);
  2795. #else
  2796. return riscv_dsp_dist_bray_curtis_f32(src1, src2, size);
  2797. #endif
  2798. #endif
  2799. }
  2800. /**
  2801. * @brief Canberra distance between two vectors
  2802. * @param[in] src1 First vector
  2803. * @param[in] src2 Second vector
  2804. * @param[in] size vector length
  2805. * @return distance
  2806. */
  2807. static inline float32_t hpm_dsp_dist_canberra_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
  2808. {
  2809. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2810. #ifdef __zcc__
  2811. return tpt_canberra_distance_f32(src1, src2, size);
  2812. #else
  2813. return riscv_dsp_dist_canberra_f32(src1, src2, size);
  2814. #endif
  2815. #endif
  2816. }
  2817. /**
  2818. * @brief Chebyshev distance between two vectors
  2819. * @param[in] src1 First vector
  2820. * @param[in] src2 Second vector
  2821. * @param[in] size vector length
  2822. * @return distance
  2823. */
  2824. static inline float32_t hpm_dsp_dist_chebyshev_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
  2825. {
  2826. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2827. #ifdef __zcc__
  2828. return tpt_chebyshev_distance_f32(src1, src2, size);
  2829. #else
  2830. return riscv_dsp_dist_chebyshev_f32(src1, src2, size);
  2831. #endif
  2832. #endif
  2833. }
  2834. /**
  2835. * @brief Cityblock (Manhattan) distance between two vectors
  2836. * @param[in] src1 First vector
  2837. * @param[in] src2 Second vector
  2838. * @param[in] size vector length
  2839. * @return distance
  2840. */
  2841. static inline float32_t hpm_dsp_dist_city_block_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
  2842. {
  2843. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2844. #ifdef __zcc__
  2845. return tpt_cityblock_distance_f32(src1, src2, size);
  2846. #else
  2847. return riscv_dsp_dist_city_block_f32(src1, src2, size);
  2848. #endif
  2849. #endif
  2850. }
  2851. /**
  2852. * @brief Correlation distance between two vectors
  2853. * @param[in] src1 First vector
  2854. * @param[in] src2 Second vector
  2855. * @param[in] size vector length
  2856. * @return distance
  2857. */
  2858. static inline float32_t hpm_dsp_dist_corr_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
  2859. {
  2860. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2861. #ifdef __zcc__
  2862. return tpt_correlation_distance_f32(src1, src2, size);
  2863. #else
  2864. return riscv_dsp_dist_corr_f32(src1, src2, size);
  2865. #endif
  2866. #endif
  2867. }
  2868. /**
  2869. * @brief Cosine distance between two vectors
  2870. * @param[in] src1 First vector
  2871. * @param[in] src2 Second vector
  2872. * @param[in] size vector length
  2873. * @return distance
  2874. */
  2875. static inline float32_t hpm_dsp_dist_cos_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
  2876. {
  2877. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2878. #ifdef __zcc__
  2879. return tpt_cosine_distance_f32(src1, src2, size);
  2880. #else
  2881. return riscv_dsp_dist_cos_f32(src1, src2, size);
  2882. #endif
  2883. #endif
  2884. }
  2885. /**
  2886. * @brief Euclidean distance between two vectors
  2887. * @param[in] src1 First vector
  2888. * @param[in] src2 Second vector
  2889. * @param[in] size vector length
  2890. * @return distance
  2891. */
  2892. static inline float32_t hpm_dsp_dist_euclidean_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
  2893. {
  2894. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2895. #ifdef __zcc__
  2896. return tpt_euclidean_distance_f32(src1, src2, size);
  2897. #else
  2898. return riscv_dsp_dist_euclidean_f32(src1, src2, size);
  2899. #endif
  2900. #endif
  2901. }
  2902. /**
  2903. * @brief Jensen-Shannon distance between two vectors
  2904. * @param[in] src1 First vector
  2905. * @param[in] src2 Second vector
  2906. * @param[in] size vector length
  2907. * @return distance
  2908. */
  2909. static inline float32_t hpm_dsp_dist_jensen_shannon_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
  2910. {
  2911. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2912. #ifdef __zcc__
  2913. return tpt_jensenshannon_distance_f32(src1, src2, size);
  2914. #else
  2915. return riscv_dsp_dist_jensen_shannon_f32(src1, src2, size);
  2916. #endif
  2917. #endif
  2918. }
  2919. /**
  2920. * @brief Minkowski distance between two vectors
  2921. * @param[in] src1 First vector
  2922. * @param[in] src2 Second vector
  2923. * @param[in] order Distance order
  2924. * @param[in] size vector length
  2925. * @return distance
  2926. */
  2927. static inline float32_t hpm_dsp_dist_minkowski_f32(const float32_t *src1, const float32_t *src2, int32_t order, uint32_t size)
  2928. {
  2929. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2930. #ifdef __zcc__
  2931. return tpt_minkowski_distance_f32(src1, src2, order, size);
  2932. #else
  2933. return riscv_dsp_dist_minkowski_f32(src1, src2, order, size);
  2934. #endif
  2935. #endif
  2936. }
  2937. /**
  2938. * @brief Dice distance between two vectors
  2939. * @param[in] src1 First vector
  2940. * @param[in] src2 Second vector
  2941. * @param[in] numofbool Number of booleans
  2942. * @return distance
  2943. */
  2944. static inline float32_t hpm_dsp_bdist_dice_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
  2945. {
  2946. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2947. #ifdef __zcc__
  2948. return tpt_dice_distance(src1, src2, numofbool);
  2949. #else
  2950. return riscv_dsp_bdist_dice_u32_f32(src1, src2, numofbool);
  2951. #endif
  2952. #endif
  2953. }
  2954. /**
  2955. * @brief Hamming distance between two vectors
  2956. * @param[in] src1 First vector
  2957. * @param[in] src2 Second vector
  2958. * @param[in] numofbool Number of booleans
  2959. * @return distance
  2960. */
  2961. static inline float32_t hpm_dsp_bdist_hamming_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
  2962. {
  2963. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2964. #ifdef __zcc__
  2965. return tpt_hamming_distance(src1, src2, numofbool);
  2966. #else
  2967. return riscv_dsp_bdist_hamming_u32_f32(src1, src2, numofbool);
  2968. #endif
  2969. #endif
  2970. }
  2971. /**
  2972. * @brief Jaccard distance between two vectors
  2973. * @param[in] src1 First vector
  2974. * @param[in] src2 Second vector
  2975. * @param[in] numofbool Number of booleans
  2976. * @return distance
  2977. */
  2978. static inline float32_t hpm_dsp_bdist_jaccard_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
  2979. {
  2980. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2981. #ifdef __zcc__
  2982. return tpt_jaccard_distance(src1, src2, numofbool);
  2983. #else
  2984. return riscv_dsp_bdist_jaccard_u32_f32(src1, src2, numofbool);
  2985. #endif
  2986. #endif
  2987. }
  2988. /**
  2989. * @brief Kulsinski distance between two vectors
  2990. * @param[in] src1 First vector
  2991. * @param[in] src2 Second vector
  2992. * @param[in] numofbool Number of booleans
  2993. * @return distance
  2994. */
  2995. static inline float32_t hpm_dsp_bdist_kulsinski_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
  2996. {
  2997. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  2998. #ifdef __zcc__
  2999. return tpt_kulsinski_distance(src1, src2, numofbool);
  3000. #else
  3001. return riscv_dsp_bdist_kulsinski_u32_f32(src1, src2, numofbool);
  3002. #endif
  3003. #endif
  3004. }
  3005. /**
  3006. * @brief Sokal-Michener distance between two vectors
  3007. * @param[in] src1 First vector
  3008. * @param[in] src2 Second vector
  3009. * @param[in] numofbool Number of booleans
  3010. * @return distance
  3011. */
  3012. static inline float32_t hpm_dsp_bdist_sokal_michener_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
  3013. {
  3014. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3015. #ifdef __zcc__
  3016. return tpt_sokalmichener_distance(src1, src2, numofbool);
  3017. #else
  3018. return riscv_dsp_bdist_sokal_michener_u32_f32(src1, src2, numofbool);
  3019. #endif
  3020. #endif
  3021. }
  3022. /**
  3023. * @brief Sokal-Sneath distance between two vectors
  3024. * @param[in] src1 First vector
  3025. * @param[in] src2 Second vector
  3026. * @param[in] numofbool Number of booleans
  3027. * @return distance
  3028. */
  3029. static inline float32_t hpm_dsp_bdist_sokal_sneath_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
  3030. {
  3031. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3032. #ifdef __zcc__
  3033. return tpt_sokalsneath_distance(src1, src2, numofbool);
  3034. #else
  3035. return riscv_dsp_bdist_sokal_sneath_u32_f32(src1, src2, numofbool);
  3036. #endif
  3037. #endif
  3038. }
  3039. /**
  3040. * @brief Roger Stanimoto distance between two vectors
  3041. * @param[in] src1 First vector
  3042. * @param[in] src2 Second vector
  3043. * @param[in] numofbool Number of booleans
  3044. * @return distance
  3045. */
  3046. static inline float32_t hpm_dsp_bdist_rogers_tanimoto_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
  3047. {
  3048. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3049. #ifdef __zcc__
  3050. return tpt_rogerstanimoto_distance(src1, src2, numofbool);
  3051. #else
  3052. return riscv_dsp_bdist_rogers_tanimoto_u32_f32(src1, src2, numofbool);
  3053. #endif
  3054. #endif
  3055. }
  3056. /**
  3057. * @brief Yule distance between two vectors
  3058. * @param[in] src1 First vector
  3059. * @param[in] src2 Second vector
  3060. * @param[in] numofbool Number of booleans
  3061. * @return distance
  3062. */
  3063. static inline float32_t hpm_dsp_bdist_yule_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
  3064. {
  3065. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3066. #ifdef __zcc__
  3067. return tpt_yule_distance(src1, src2, numofbool);
  3068. #else
  3069. return riscv_dsp_bdist_yule_u32_f32(src1, src2, numofbool);
  3070. #endif
  3071. #endif
  3072. }
  3073. /**
  3074. * @brief Russell-Rao distance between two vectors
  3075. * @param[in] src1 First vector
  3076. * @param[in] src2 Second vector
  3077. * @param[in] numofbool Number of booleans
  3078. * @return distance
  3079. */
  3080. static inline float32_t hpm_dsp_bdist_russell_rao_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
  3081. {
  3082. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3083. #ifdef __zcc__
  3084. return tpt_russellrao_distance(src1, src2, numofbool);
  3085. #else
  3086. return riscv_dsp_bdist_russell_rao_u32_f32(src1, src2, numofbool);
  3087. #endif
  3088. #endif
  3089. }
  3090. #endif
  3091. #endif
  3092. /**
  3093. * @}
  3094. *
  3095. */
  3096. #ifdef HPM_MATH_DSP_FILTERING
  3097. /**
  3098. * @defgroup filtering DSP Filtering Functions
  3099. * @ingroup hpmmath
  3100. * @{
  3101. */
  3102. #ifdef HPM_EN_MATH_DSP_LIB
  3103. #ifdef __zcc__
  3104. #include "tpt_math.h"
  3105. #endif
  3106. #include "riscv_dsp_filtering_math.h"
  3107. /**
  3108. * @brief Function for the floating-point FIR filter.
  3109. * @param[in] *instance points to an instance of the FIR structure.
  3110. * @param[in] *src points to the input block data.
  3111. * @param[out] *dst points to the output block data.
  3112. * @param[in] size number of the blocksize.
  3113. */
  3114. static inline void hpm_dsp_fir_f32(const riscv_dsp_fir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
  3115. {
  3116. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3117. riscv_dsp_fir_f32(instance, src, dst, size);
  3118. #endif
  3119. }
  3120. /**
  3121. * @brief Function for the q31 FIR filter.
  3122. * @param[in] *instance points to an instance of the FIR structure.
  3123. * @param[in] *src points to the input block data.
  3124. * @param[out] *dst points to the output block data.
  3125. * @param[in] size number of the blocksize.
  3126. *
  3127. * Function notes:
  3128. * Both coefficients and state variables are represented in 1.31 format
  3129. * and multiplications yield a 2.62 result. The 2.62 results are accumulated
  3130. * in a 64-bit accumulator and is right shifted by 31 bits and saturated to
  3131. * 1.31 formatthe to yield the final result. In order to avoid overflows
  3132. * completely the input signal must be scaled down by log2(coeff_size) bits.
  3133. */
  3134. static inline void hpm_dsp_fir_q31(const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
  3135. {
  3136. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3137. riscv_dsp_fir_q31(instance, src, dst, size);
  3138. #endif
  3139. }
  3140. /**
  3141. * @brief Function for the q31 FIR filter.
  3142. * @param[in] *instance points to an instance of the FIR structure.
  3143. * @param[in] *src points to the input block data.
  3144. * @param[out] *dst points to the output block data.
  3145. * @param[in] size number of the blocksize.
  3146. *
  3147. * Function notes:
  3148. * Both coefficients and state variables are represented in 1.31 format.
  3149. * These intermediate multiplications results are added to a 2.30 accumulator.
  3150. * Finally, the accumulator is saturated and
  3151. * converted to a 1.31 result. In order to avoid overflows
  3152. * completely the input signal must be scaled down by log2(coeff_size) bits.
  3153. */
  3154. static inline void hpm_dsp_fir_fast_q31(const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
  3155. {
  3156. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3157. riscv_dsp_fir_fast_q31(instance, src, dst, size);
  3158. #endif
  3159. }
  3160. /**
  3161. * @brief Function for the q15 FIR filter.
  3162. * @param[in] *instance points to an instance of the FIR structure.
  3163. * @param[in] *src points to the input block data.
  3164. * @param[out] *dst points to the output block data.
  3165. * @param[in] size number of the blocksize.
  3166. *
  3167. * Function notes:
  3168. * Both coefficients and state variables are represented in 1.15 format
  3169. * and multiplications yield a 2.30 result. The 2.30 results are accumulated
  3170. * in a 64-bit accumulator in 34.30 format and the results is truncated
  3171. * to 34.15 format by discarding low 15 bits. Lastly, the outputs is
  3172. * saturated to yield a result in 1.15 format.
  3173. */
  3174. static inline void hpm_dsp_fir_q15(const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
  3175. {
  3176. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3177. riscv_dsp_fir_q15(instance, src, dst, size);
  3178. #endif
  3179. }
  3180. /**
  3181. * @brief Function for the q15 FIR filter.
  3182. * @param[in] *instance points to an instance of the FIR structure.
  3183. * @param[in] *src points to the input block data.
  3184. * @param[out] *dst points to the output block data.
  3185. * @param[in] size number of the blocksize.
  3186. *
  3187. * Function notes:
  3188. * Both coefficients and state variables are represented in Q15 format and multiplications yield
  3189. * a Q30 result. The results are accumulated in a 32-bit accumulator in Q2.30 format. Lastly, the
  3190. * outputs are saturated to yield a result in Q1.15 format.
  3191. */
  3192. static inline void hpm_dsp_fir_fast_q15(const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
  3193. {
  3194. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3195. riscv_dsp_fir_fast_q15(instance, src, dst, size);
  3196. #endif
  3197. }
  3198. /**
  3199. * @brief Function for the q7 FIR filter.
  3200. * @param[in] *instance points to an instance of the FIR structure.
  3201. * @param[in] *src points to the input block data.
  3202. * @param[out] *dst points to the output block data.
  3203. * @param[in] size number of the blocksize.
  3204. *
  3205. * Function notes:
  3206. * Both inputs are in 1.7 format and multiplications yield a 2.14 result.
  3207. * The 2.14 intermediate results are accumulated in a 32-bit accumulator in
  3208. * 18.14 format. The 18.14 result is then converted to 18.7 format by
  3209. * discarding the low 7 bits and then saturated to 1.7 format.
  3210. */
  3211. static inline void hpm_dsp_fir_q7(const riscv_dsp_fir_q7_t *instance, q7_t *src, q7_t *dst, uint32_t size)
  3212. {
  3213. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3214. riscv_dsp_fir_q7(instance, src, dst, size);
  3215. #endif
  3216. }
  3217. /**
  3218. * @brief Function for the floating-point lattice FIR filter.
  3219. * @param[in] *instance points to an instance of the lattice
  3220. * FIR structure.
  3221. * @param[in] *src points to the input block data.
  3222. * @param[out] *dst points to the output block data.
  3223. * @param[in] size number of the blocksize.
  3224. */
  3225. static inline void hpm_dsp_lfir_f32(const riscv_dsp_lfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
  3226. {
  3227. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3228. riscv_dsp_lfir_f32(instance, src, dst, size);
  3229. #endif
  3230. }
  3231. /**
  3232. * @brief Function for the q15 lattice FIR filter.
  3233. * @param[in] *instance points to an instance of the lattice
  3234. * FIR structure.
  3235. * @param[in] *src points to the input block data.
  3236. * @param[out] *dst points to the output block data.
  3237. * @param[in] size number of the blocksize.
  3238. */
  3239. static inline void hpm_dsp_lfir_q15(const riscv_dsp_lfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
  3240. {
  3241. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3242. riscv_dsp_lfir_q15(instance, src, dst, size);
  3243. #endif
  3244. }
  3245. /**
  3246. * @brief Function for the q31 lattice FIR filter.
  3247. * @param[in] *instance points to an instance of the lattice
  3248. * FIR structure.
  3249. * @param[in] *src points to the input block data.
  3250. * @param[out] *dst points to the output block data.
  3251. * @param[in] size number of the blocksize.
  3252. *
  3253. * Function notes:
  3254. * In order to avoid overflows the input signal must be scaled down by
  3255. * 2*log2(stage) bits.
  3256. */
  3257. static inline void hpm_dsp_lfir_q31(const riscv_dsp_lfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
  3258. {
  3259. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3260. riscv_dsp_lfir_q31(instance, src, dst, size);
  3261. #endif
  3262. }
  3263. static inline void hpm_dsp_dcmfir_f32(const riscv_dsp_dcmfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
  3264. {
  3265. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3266. riscv_dsp_dcmfir_f32(instance, src, dst, size);
  3267. #endif
  3268. }
  3269. static inline void hpm_dsp_dcmfir_q15(const riscv_dsp_dcmfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
  3270. {
  3271. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3272. riscv_dsp_dcmfir_q15(instance, src, dst, size);
  3273. #endif
  3274. }
  3275. static inline void hpm_dsp_dcmfir_q31(const riscv_dsp_dcmfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
  3276. {
  3277. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3278. riscv_dsp_dcmfir_q31(instance, src, dst, size);
  3279. #endif
  3280. }
  3281. static inline void hpm_dsp_dcmfir_fast_q31(const riscv_dsp_dcmfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
  3282. {
  3283. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3284. riscv_dsp_dcmfir_fast_q31(instance, src, dst, size);
  3285. #endif
  3286. }
  3287. static inline void hpm_dsp_dcmfir_fast_q15(const riscv_dsp_dcmfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
  3288. {
  3289. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3290. riscv_dsp_dcmfir_fast_q15(instance, src, dst, size);
  3291. #endif
  3292. }
  3293. static inline void hpm_dsp_upsplfir_f32(const riscv_dsp_upsplfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
  3294. {
  3295. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3296. riscv_dsp_upsplfir_f32(instance, src, dst, size);
  3297. #endif
  3298. }
  3299. static inline void hpm_dsp_upsplfir_q15(const riscv_dsp_upsplfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
  3300. {
  3301. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3302. riscv_dsp_upsplfir_q15(instance, src, dst, size);
  3303. #endif
  3304. }
  3305. static inline void hpm_dsp_upsplfir_q31(const riscv_dsp_upsplfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
  3306. {
  3307. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3308. riscv_dsp_upsplfir_q31(instance, src, dst, size);
  3309. #endif
  3310. }
  3311. static inline void hpm_dsp_spafir_f32(riscv_dsp_spafir_f32_t *instance, float32_t *src, float32_t *dst, float32_t *buf, uint32_t size)
  3312. {
  3313. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3314. riscv_dsp_spafir_f32(instance, src, dst, buf, size);
  3315. #endif
  3316. }
  3317. static inline void hpm_dsp_spafir_q15(riscv_dsp_spafir_q15_t *instance, q15_t *src, q15_t *dst, q15_t *buf1, q31_t *buf2, uint32_t size)
  3318. {
  3319. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3320. riscv_dsp_spafir_q15(instance, src, dst, buf1, buf2, size);
  3321. #endif
  3322. }
  3323. static inline void hpm_dsp_spafir_q31(riscv_dsp_spafir_q31_t *instance, q31_t *src, q31_t *dst, q31_t *buf, uint32_t size)
  3324. {
  3325. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3326. riscv_dsp_spafir_q31(instance, src, dst, buf, size);
  3327. #endif
  3328. }
  3329. static inline void hpm_dsp_spafir_q7(riscv_dsp_spafir_q7_t *instance, q7_t *src, q7_t *dst, q7_t *buf1, q31_t *buf2, uint32_t size)
  3330. {
  3331. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3332. riscv_dsp_spafir_q7(instance, src, dst, buf1, buf2, size);
  3333. #endif
  3334. }
  3335. // Standard LMS filte
  3336. /**
  3337. * @brief Structure for the floatint-point standard LMS Filters.
  3338. */
  3339. /**
  3340. * @brief Function for the floating-point LMS filter.
  3341. * @param[in] *instance points to an instance of the LMS structure.
  3342. * @param[in] *src points to the input block data.
  3343. * @param[in] *ref points to the reference data.
  3344. * @param[out] *dst points to the output data.
  3345. * @param[out] *err points to the error data.
  3346. * @param[in] size number of the blocksize.
  3347. */
  3348. static inline void hpm_dsp_lms_f32(const riscv_dsp_lms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t size)
  3349. {
  3350. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3351. riscv_dsp_lms_f32(instance, src, ref, dst, err, size);
  3352. #endif
  3353. }
  3354. /**
  3355. * @brief Function for the q31 LMS filter.
  3356. * @param[in] *instance points to an instance of the LMS structure.
  3357. * @param[in] *src points to the input block data.
  3358. * @param[in] *ref points to the reference data.
  3359. * @param[out] *dst points to the output data.
  3360. * @param[out] *err points to the error data.
  3361. * @param[in] size number of the blocksize.
  3362. *
  3363. * Function notes:
  3364. * Both coefficients and state variables are represented in 1.31 format
  3365. * and multiplications yield a 2.62 result. The 2.62 results are accumulated
  3366. * in a 64-bit accumulator and is right shifted by 31 bits and saturated to
  3367. * 1.31 formatthe to yield the final result. In order to avoid overflows
  3368. * completely the input signal must be scaled down by log2(coeff_size) bits.
  3369. */
  3370. static inline void hpm_dsp_lms_q31(const riscv_dsp_lms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t size)
  3371. {
  3372. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3373. riscv_dsp_lms_q31(instance, src, ref, dst, err, size);
  3374. #endif
  3375. }
  3376. /**
  3377. * @brief Function for the q15 LMS filter.
  3378. * @param[in] *instance points to an instance of the LMS structure.
  3379. * @param[in] *src points to the input block data.
  3380. * @param[in] *ref points to the reference data.
  3381. * @param[out] *dst points to the output data.
  3382. * @param[out] *err points to the error data.
  3383. * @param[in] size number of the blocksize.
  3384. *
  3385. * Function notes:
  3386. * Both coefficients and state variables are represented in 1.15 format
  3387. * and multiplications yield a 2.30 result. The 2.30 results are accumulated
  3388. * in a 64-bit accumulator in 34.30 format and the results is truncated
  3389. * to 34.15 format by discarding low 15 bits. Lastly, the outputs is
  3390. * saturated to yield a result in 1.15 format.
  3391. */
  3392. static inline void hpm_dsp_lms_q15(const riscv_dsp_lms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t size)
  3393. {
  3394. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3395. riscv_dsp_lms_q15(instance, src, ref, dst, err, size);
  3396. #endif
  3397. }
  3398. /**
  3399. * @brief Structure for the f32 normalized LMS filter.
  3400. */
  3401. static inline void hpm_dsp_nlms_f32(riscv_dsp_nlms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t size)
  3402. {
  3403. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3404. riscv_dsp_nlms_f32(instance, src, ref, dst, err, size);
  3405. #endif
  3406. }
  3407. /**
  3408. * @brief Structure for the q31 normalized LMS filter.
  3409. */
  3410. static inline void hpm_dsp_nlms_q31(riscv_dsp_nlms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t size)
  3411. {
  3412. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3413. riscv_dsp_nlms_q31(instance, src, ref, dst, err, size);
  3414. #endif
  3415. }
  3416. static inline void hpm_dsp_nlms_q15(riscv_dsp_nlms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t size)
  3417. {
  3418. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3419. riscv_dsp_nlms_q15(instance, src, ref, dst, err, size);
  3420. #endif
  3421. }
  3422. // Convolution
  3423. /**
  3424. * @brief Convolution of the floating-point vectors.
  3425. * @param[in] *src1 points to the first input vector.
  3426. * @param[in] len1 length of the first input vector.
  3427. * @param[in] *src2 points to the second input vector.
  3428. * @param[in] len2 length of the second input vector.
  3429. * @param[out] *dst points to the output vector where the length is
  3430. * len1 + len2 - 1.
  3431. */
  3432. static inline void hpm_dsp_conv_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
  3433. {
  3434. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3435. #ifdef __zcc__
  3436. tpt_conv_f32(dst, src1, len1, src2, len2);
  3437. #else
  3438. riscv_dsp_conv_f32(src1, len1, src2, len2, dst);
  3439. #endif
  3440. #endif
  3441. }
  3442. /**
  3443. * @brief Convolution of the q15 vectors.
  3444. * @param[in] *src1 points to the first input vector.
  3445. * @param[in] len1 length of the first input vector.
  3446. * @param[in] *src2 points to the second input vector.
  3447. * @param[in] len2 length of the second input vector.
  3448. * @param[out] *dst points to the output vector where the length is
  3449. * len1 + len2 - 1.
  3450. *
  3451. * Function notes:
  3452. * Both inputs are in 1.15 format and multiplications yield a 2.30 result.
  3453. * The 2.30 intermediate results are accumulated in a 64-bit accumulator in
  3454. * 34.30 format. The 34.30 result is then truncated to 34.15 format by
  3455. * discarding the low 15 bits and then saturated to 1.15 format.
  3456. */
  3457. static inline void hpm_dsp_conv_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
  3458. {
  3459. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3460. #ifdef __zcc__
  3461. tpt_conv_q15(dst, src1, len1, src2, len2);
  3462. #else
  3463. riscv_dsp_conv_q15(src1, len1, src2, len2, dst);
  3464. #endif
  3465. #endif
  3466. }
  3467. /**
  3468. * @brief Convolution of the q31 vectors.
  3469. * @param[in] *src1 points to the first input vector.
  3470. * @param[in] len1 length of the first input vector.
  3471. * @param[in] *src2 points to the second input vector.
  3472. * @param[in] len2 length of the second input vector.
  3473. * @param[out] *dst points to the output vector where the length is
  3474. * len1 + len2 - 1.
  3475. *
  3476. * Function notes:
  3477. * Both inputs are in 1.31 format and the 64-bit accumulator has a 2.62
  3478. * format and maintains full precision of the intermediate multiplication
  3479. * results but provides only a single guard bit. The input signals should be
  3480. * scaled down to avoid intermediate overflows. Scale down the inputs by
  3481. * log2(min(srcALen, srcBLen)), The 2.62 accumulator is right shifted by 31
  3482. * bits and saturated to 1.31 forma t to yield the final result.
  3483. */
  3484. static inline void hpm_dsp_conv_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
  3485. {
  3486. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3487. #ifdef __zcc__
  3488. tpt_conv_q31(dst, src1, len1, src2, len2);
  3489. #else
  3490. riscv_dsp_conv_q31(src1, len1, src2, len2, dst);
  3491. #endif
  3492. #endif
  3493. }
  3494. /**
  3495. * @brief Convolution of the q7 vectors.
  3496. * @param[in] *src1 points to the first input vector.
  3497. * @param[in] len1 length of the first input vector.
  3498. * @param[in] *src2 points to the second input vector.
  3499. * @param[in] len2 length of the second input vector.
  3500. * @param[out] *dst points to the output vector where the length is
  3501. * len1 + len2 - 1.
  3502. *
  3503. * Function notes:
  3504. * Both inputs are in 1.7 format and multiplications yield a 2.14 result.
  3505. * The 2.14 intermediate results are accumulated in a 32-bit accumulator in
  3506. * 18.14 format. The 18.14 result is then truncated to 18.7 format by
  3507. * discarding the low 7 bits and then saturated to 1.7 format.
  3508. */
  3509. static inline void hpm_dsp_conv_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
  3510. {
  3511. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3512. #ifdef __zcc__
  3513. tpt_conv_q7(dst, src1, len1, src2, len2);
  3514. #else
  3515. riscv_dsp_conv_q7(src1, len1, src2, len2, dst);
  3516. #endif
  3517. #endif
  3518. }
  3519. /**
  3520. * @brief Convolution Partial of the floating-point vectors.
  3521. * @param[in] *src1 points to the first input vector.
  3522. * @param[in] len1 length of the first input vector.
  3523. * @param[in] *src2 points to the second input vector.
  3524. * @param[in] len2 length of the second input vector.
  3525. * @param[out] *dst points to the output vector where the length is
  3526. * len1 + len2 - 1.
  3527. * @param[in] startindex is the first output sample to start with.
  3528. * @param[in] size is the number of output points to be computed.
  3529. * @return Returns
  3530. * 0; success
  3531. * -1; fail, the input subset are not between 0 and len1+len2-2.
  3532. */
  3533. static inline int32_t hpm_dsp_conv_partial_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst, uint32_t startindex, uint32_t size)
  3534. {
  3535. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3536. #ifdef __zcc__
  3537. return tpt_conv_partial_f32(dst, src1, len1, src2, len2, startindex, size);
  3538. #else
  3539. return riscv_dsp_conv_partial_f32(src1, len1, src2, len2, dst, startindex,
  3540. size);
  3541. #endif
  3542. #endif
  3543. }
  3544. /**
  3545. * @brief Convolution Partial of the q15 vectors.
  3546. * @param[in] *src1 points to the first input vector.
  3547. * @param[in] len1 length of the first input vector.
  3548. * @param[in] *src2 points to the second input vector.
  3549. * @param[in] len2 length of the second input vector.
  3550. * @param[out] *dst points to the output vector where the length is
  3551. * len1 + len2 - 1.
  3552. * @param[in] startindex is the first output sample to start with.
  3553. * @param[in] size is the number of output points to be computed.
  3554. * @return Returns
  3555. * 0; success
  3556. * -1; fail, the input subset are not between 0 and len1+len2-2.
  3557. */
  3558. static inline int32_t hpm_dsp_conv_partial_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst, uint32_t startindex, uint32_t size)
  3559. {
  3560. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3561. #ifdef __zcc__
  3562. return tpt_conv_partial_q15(dst, src1, len1, src2, len2, startindex, size);
  3563. #else
  3564. return riscv_dsp_conv_partial_q15(src1, len1, src2, len2, dst, startindex,
  3565. size);
  3566. #endif
  3567. #endif
  3568. }
  3569. /**
  3570. * @brief Convolution Partial of the q31 vectors.
  3571. * @param[in] *src1 points to the first input vector.
  3572. * @param[in] len1 length of the first input vector.
  3573. * @param[in] *src2 points to the second input vector.
  3574. * @param[in] len2 length of the second input vector.
  3575. * @param[out] *dst points to the output vector where the length is
  3576. * len1 + len2 - 1.
  3577. * @param[in] startindex is the first output sample to start with.
  3578. * @param[in] size is the number of output points to be computed.
  3579. * @return Returns
  3580. * 0; success
  3581. * -1; fail, the input subset are not between 0 and len1+len2-2.
  3582. */
  3583. static inline int32_t hpm_dsp_conv_partial_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst, uint32_t startindex, uint32_t size)
  3584. {
  3585. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3586. #ifdef __zcc__
  3587. return tpt_conv_partial_q31(dst, src1, len1, src2, len2, startindex, size);
  3588. #else
  3589. return riscv_dsp_conv_partial_q31(src1, len1, src2, len2, dst, startindex,
  3590. size);
  3591. #endif
  3592. #endif
  3593. }
  3594. /**
  3595. * @brief Convolution Partial of the q7 vectors.
  3596. * @param[in] *src1 points to the first input vector.
  3597. * @param[in] len1 length of the first input vector.
  3598. * @param[in] *src2 points to the second input vector.
  3599. * @param[in] len2 length of the second input vector.
  3600. * @param[out] *dst points to the output vector where the length is
  3601. * len1 + len2 - 1.
  3602. * @param[in] startindex is the first output sample to start with.
  3603. * @param[in] size is the number of output points to be computed.
  3604. * @return Returns
  3605. * 0; success
  3606. * -1; fail, the input subset are not between 0 and len1+len2-2.
  3607. */
  3608. static inline int32_t hpm_dsp_conv_partial_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst, uint32_t startindex, uint32_t size)
  3609. {
  3610. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3611. #ifdef __zcc__
  3612. return tpt_conv_partial_q7(dst, src1, len1, src2, len2, startindex, size);
  3613. #else
  3614. return riscv_dsp_conv_partial_q7(src1, len1, src2, len2, dst, startindex,
  3615. size);
  3616. #endif
  3617. #endif
  3618. }
  3619. // Correlation
  3620. /**
  3621. * @brief Correlation of the floating-point vectors.
  3622. * @param[in] *src1 points to the first input vector.
  3623. * @param[in] len1 length of the first input vector.
  3624. * @param[in] *src2 points to the second input vector.
  3625. * @param[in] len2 length of the second input vector.
  3626. * @param[out] *dst points to the output vector where the length is
  3627. * 2 * max(len1, len2) - 1.
  3628. */
  3629. static inline void hpm_dsp_corr_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
  3630. {
  3631. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3632. #ifdef __zcc__
  3633. tpt_correlate_f32(dst, src1, len1, src2, len2);
  3634. #else
  3635. riscv_dsp_corr_f32(src1, len1, src2, len2, dst);
  3636. #endif
  3637. #endif
  3638. }
  3639. /**
  3640. * @brief Correlation of the q15 vectors.
  3641. * @param[in] *src1 points to the first input vector.
  3642. * @param[in] len1 length of the first input vector.
  3643. * @param[in] *src2 points to the second input vector.
  3644. * @param[in] len2 length of the second input vector.
  3645. * @param[out] *dst points to the output vector where the length is
  3646. * 2 * max(len1, len2) - 1.
  3647. *
  3648. * Function notes:
  3649. * Both inputs are in 1.15 format and multiplications yield a 2.30 result.
  3650. * The 2.30 intermediate results are accumulated in a 64-bit accumulator in
  3651. * 34.30 format. The 34.30 result is then truncated to 34.15 format by
  3652. * discarding the low 15 bits and then saturated to 1.15 format.
  3653. */
  3654. static inline void hpm_dsp_corr_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
  3655. {
  3656. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3657. #ifdef __zcc__
  3658. tpt_correlate_q15(dst, src1, len1, src2, len2);
  3659. #else
  3660. riscv_dsp_corr_q15(src1, len1, src2, len2, dst);
  3661. #endif
  3662. #endif
  3663. }
  3664. /**
  3665. * @brief Convolution of the q31 vectors.
  3666. * @param[in] *src1 points to the first input vector.
  3667. * @param[in] len1 length of the first input vector.
  3668. * @param[in] *src2 points to the second input vector.
  3669. * @param[in] len2 length of the second input vector.
  3670. * @param[out] *dst points to the output vector where the length is
  3671. * len1 + len2 - 1.
  3672. *
  3673. * Function notes:
  3674. * Both inputs are in 1.31 format and the 64-bit accumulator has a 2.62
  3675. * format and maintains full precision of the intermediate multiplication
  3676. * results but provides only a single guard bit. The input signals should be
  3677. * scaled down to avoid intermediate overflows. Scale down one of the inputs
  3678. * by <code>1/min(srcALen, srcBLen)</code> to avoid overflows since a
  3679. * maximum of <code>min(srcALen, srcBLen)</code> number of additions is
  3680. * carried internally. The 2.62 accumulator is right shifted by 31 bits and
  3681. * saturated to 1.31 forma t to yield the final result.
  3682. */
  3683. static inline void hpm_dsp_corr_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
  3684. {
  3685. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3686. #ifdef __zcc__
  3687. tpt_correlate_q31(dst, src1, len1, src2, len2);
  3688. #else
  3689. riscv_dsp_corr_q31(src1, len1, src2, len2, dst);
  3690. #endif
  3691. #endif
  3692. }
  3693. /**
  3694. * @brief Correlation of the q7 vectors.
  3695. * @param[in] *src1 points to the first input vector.
  3696. * @param[in] len1 length of the first input vector.
  3697. * @param[in] *src2 points to the second input vector.
  3698. * @param[in] len2 length of the second input vector.
  3699. * @param[out] *dst points to the output vector where the length is
  3700. * 2 * max(len1, len2) - 1.
  3701. *
  3702. * Function notes:
  3703. * Both inputs are in 1.7 format and multiplications yield a 2.14 result.
  3704. * The 2.14 intermediate results are accumulated in a 32-bit accumulator in
  3705. * 18.14 format. The 18.14 result is then truncated to 18.7 format by
  3706. * discarding the low 7 bits and then saturated to 1.7 format.
  3707. */
  3708. static inline void hpm_dsp_corr_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
  3709. {
  3710. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3711. #ifdef __zcc__
  3712. tpt_correlate_q7(dst, src1, len1, src2, len2);
  3713. #else
  3714. riscv_dsp_corr_q7(src1, len1, src2, len2, dst);
  3715. #endif
  3716. #endif
  3717. }
  3718. static inline void hpm_dsp_bq_df1_f32(const riscv_dsp_bq_df1_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
  3719. {
  3720. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3721. riscv_dsp_bq_df1_f32(instance, src, dst, size);
  3722. #endif
  3723. }
  3724. static inline void hpm_dsp_bq_df1_q15(const riscv_dsp_bq_df1_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
  3725. {
  3726. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3727. riscv_dsp_bq_df1_q15(instance, src, dst, size);
  3728. #endif
  3729. }
  3730. static inline void hpm_dsp_bq_df1_fast_q15(const riscv_dsp_bq_df1_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
  3731. {
  3732. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3733. riscv_dsp_bq_df1_fast_q15(instance, src, dst, size);
  3734. #endif
  3735. }
  3736. static inline void hpm_dsp_bq_df1_q31(const riscv_dsp_bq_df1_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
  3737. {
  3738. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3739. riscv_dsp_bq_df1_q31(instance, src, dst, size);
  3740. #endif
  3741. }
  3742. static inline void hpm_dsp_bq_df1_fast_q31(const riscv_dsp_bq_df1_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
  3743. {
  3744. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3745. riscv_dsp_bq_df1_fast_q31(instance, src, dst, size);
  3746. #endif
  3747. }
  3748. static inline void hpm_dsp_bq_df1_32x64_q31(const riscv_dsp_bq_df1_32x64_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
  3749. {
  3750. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3751. riscv_dsp_bq_df1_32x64_q31(instance, src, dst, size);
  3752. #endif
  3753. }
  3754. static inline void hpm_dsp_bq_df2T_f32(const riscv_dsp_bq_df2T_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
  3755. {
  3756. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3757. riscv_dsp_bq_df2T_f32(instance, src, dst, size);
  3758. #endif
  3759. }
  3760. static inline void hpm_dsp_bq_df2T_f64(const riscv_dsp_bq_df2T_f64_t *instance, float64_t *src, float64_t *dst, uint32_t size)
  3761. {
  3762. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3763. riscv_dsp_bq_df2T_f64(instance, src, dst, size);
  3764. #endif
  3765. }
  3766. static inline void hpm_dsp_bq_stereo_df2T_f32(const riscv_dsp_bq_stereo_df2T_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
  3767. {
  3768. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3769. riscv_dsp_bq_stereo_df2T_f32(instance, src, dst, size);
  3770. #endif
  3771. }
  3772. static inline void hpm_dsp_liir_f32(const riscv_dsp_liir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
  3773. {
  3774. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3775. riscv_dsp_liir_f32(instance, src, dst, size);
  3776. #endif
  3777. }
  3778. static inline void hpm_dsp_liir_q31(const riscv_dsp_liir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
  3779. {
  3780. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3781. riscv_dsp_liir_q31(instance, src, dst, size);
  3782. #endif
  3783. }
  3784. static inline void hpm_dsp_liir_fast_q31(const riscv_dsp_liir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
  3785. {
  3786. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3787. riscv_dsp_liir_fast_q31(instance, src, dst, size);
  3788. #endif
  3789. }
  3790. static inline void hpm_dsp_liir_q15(const riscv_dsp_liir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
  3791. {
  3792. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3793. riscv_dsp_liir_q15(instance, src, dst, size);
  3794. #endif
  3795. }
  3796. static inline void hpm_dsp_liir_fast_q15(const riscv_dsp_liir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
  3797. {
  3798. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3799. riscv_dsp_liir_fast_q15(instance, src, dst, size);
  3800. #endif
  3801. }
  3802. #endif
  3803. #endif
  3804. /**
  3805. * @}
  3806. *
  3807. */
  3808. #ifdef HPM_MATH_DSP_MATRIX
  3809. /**
  3810. * @defgroup matrix DSP Matrix Functions
  3811. *
  3812. * This set of functions provides basic matrix math operations.
  3813. * The funciotn specifies the size of the matrix and then points to an array.
  3814. * For example,
  3815. * the function definition for the floating-point is shown below:
  3816. * <pre>
  3817. * void riscv_dsp_funcname_f32(const float32_t *src1,
  3818. * const float32_t *src2,
  3819. * float32_t *dst,
  3820. * uint32_t row,
  3821. * uint32_t col,
  3822. * uint32_t row2,
  3823. * uint32_t col2)
  3824. * </pre>
  3825. * where it can be transform to the two matrix. For the matrix 1 is a
  3826. * <code>row * col</code> matrix and the matrix 2 is a
  3827. * <code>rol2 * col2</code> and the output matrix woild be different since
  3828. * the math operation. There are similar definitions for Q15 and Q31 data types.
  3829. * @ingroup hpmmath
  3830. * @{
  3831. */
  3832. #ifdef HPM_EN_MATH_DSP_LIB
  3833. #ifdef __zcc__
  3834. #include "tpt_math.h"
  3835. #endif
  3836. #include "riscv_dsp_matrix_math.h"
  3837. // Matrix Addition
  3838. /**
  3839. * @brief Addition of two floating-potint matrices.
  3840. * @param[in] *src1 points to the first input matrix.
  3841. * @param[in] *src2 points to the second input matrix.
  3842. * @param[out] *dst points to the output matrix.
  3843. * @param[in] row number of the matrix rows.
  3844. * @param[in] col number of the matrix columns.
  3845. */
  3846. static inline void hpm_dsp_mat_add_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
  3847. {
  3848. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3849. #ifdef __zcc__
  3850. tpt_mat_add_f32(dst, src1, src2, row, col);
  3851. #else
  3852. riscv_dsp_mat_add_f32(src1, src2, dst, row, col);
  3853. #endif
  3854. #endif
  3855. }
  3856. /**
  3857. * @brief Addition of two q15 matrices.
  3858. * @param[in] *src1 points to the first input matrix.
  3859. * @param[in] *src2 points to the second input matrix.
  3860. * @param[out] *dst points to the output matrix.
  3861. * @param[in] row number of the matrix rows.
  3862. * @param[in] col number of the matrix columns.
  3863. *
  3864. * The output results will be saturated in Q15 range [0x8000 0x7FFF].
  3865. */
  3866. static inline void hpm_dsp_mat_add_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
  3867. {
  3868. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3869. #ifdef __zcc__
  3870. tpt_mat_add_q15(dst, src1, src2, row, col);
  3871. #else
  3872. riscv_dsp_mat_add_q15(src1, src2, dst, row, col);
  3873. #endif
  3874. #endif
  3875. }
  3876. /**
  3877. * @brief Addition of two q31 matrices.
  3878. * @param[in] *src1 points to the first input matrix.
  3879. * @param[in] *src2 points to the second input matrix.
  3880. * @param[out] *dst points to the output matrix.
  3881. * @param[in] row number of the matrix rows.
  3882. * @param[in] col number of the matrix columns.
  3883. *
  3884. * Ouput results will be saturated in Q31 range [0x80000000 0x7FFFFFFF].
  3885. */
  3886. static inline void hpm_dsp_mat_add_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
  3887. {
  3888. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3889. #ifdef __zcc__
  3890. tpt_mat_add_q31(dst, src1, src2, row, col);
  3891. #else
  3892. riscv_dsp_mat_add_q31(src1, src2, dst, row, col);
  3893. #endif
  3894. #endif
  3895. }
  3896. // Matrix Inverse
  3897. /**
  3898. * @brief Compute the inverse matrix of the floating-potint matrix.
  3899. * @param[in] *src points to the input matrix.
  3900. * @param[out] *dst points to the output matrix.
  3901. * @param[in] size number of the matrix row or column.
  3902. * @return the inverse process success or not.
  3903. */
  3904. static inline int32_t hpm_dsp_mat_inv_f32(float32_t *src, float32_t *dst, uint32_t size)
  3905. {
  3906. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3907. #ifdef __zcc__
  3908. return tpt_mat_inverse_f32(dst, src, size);
  3909. #else
  3910. return riscv_dsp_mat_inv_f32(src, dst, size);
  3911. #endif
  3912. #endif
  3913. }
  3914. static inline int32_t hpm_dsp_mat_inv_f64(float64_t *src, float64_t *dst, uint32_t size)
  3915. {
  3916. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3917. #ifdef __zcc__
  3918. return tpt_mat_inverse_f64(dst, src, size);
  3919. #else
  3920. return riscv_dsp_mat_inv_f64(src, dst, size);
  3921. #endif
  3922. #endif
  3923. }
  3924. // Matrix Multiplication
  3925. /**
  3926. * @brief Multiplication of two floating-point matrices.
  3927. * @param[in] *src1 points to the first input matrix.
  3928. * @param[in] *src2 points to the second input matrix.
  3929. * @param[out] *dst points to the output matrix.
  3930. * @param[in] row number of the first input matrix rows.
  3931. * @param[in] col number of the first input matrix columns.
  3932. * @param[in] col2 number of the second input matrix columns.
  3933. */
  3934. static inline void hpm_dsp_mat_mul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
  3935. {
  3936. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3937. #ifdef __zcc__
  3938. return tpt_mat_mult_f32(dst, src1, src2, row, col, col2);
  3939. #else
  3940. riscv_dsp_mat_mul_f32(src1, src2, dst, row, col, col2);
  3941. #endif
  3942. #endif
  3943. }
  3944. static inline void hpm_dsp_mat_mul_f64(const float64_t *src1, const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col, uint32_t col2)
  3945. {
  3946. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3947. #ifdef __zcc__
  3948. return tpt_mat_mult_f64(dst, src1, src2, row, col, col2);
  3949. #else
  3950. riscv_dsp_mat_mul_f64(src1, src2, dst, row, col, col2);
  3951. #endif
  3952. #endif
  3953. }
  3954. /**
  3955. * @brief Multiplication of two floating-point complex matrices.
  3956. * @param[in] *src1 points to the first input complex matrix.
  3957. * @param[in] *src2 points to the second input complex matrix.
  3958. * @param[out] *dst points to the output complex matrix.
  3959. * @param[in] row number of the first input matrix rows.
  3960. * @param[in] col number of the first input matrix columns.
  3961. * @param[in] col2 number of the second input matrix columns.
  3962. */
  3963. static inline void hpm_dsp_cmat_mul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
  3964. {
  3965. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3966. #ifdef __zcc__
  3967. return tpt_mat_cmplx_mult_f32(dst, src1, src2, row, col, col2);
  3968. #else
  3969. riscv_dsp_cmat_mul_f32(src1, src2, dst, row, col, col2);
  3970. #endif
  3971. #endif
  3972. }
  3973. /**
  3974. * @brief Multiplication of two q15 matrices.
  3975. * @param[in] *src1 points to the first input matrix.
  3976. * @param[in] *src2 points to the second input matrix.
  3977. * @param[out] *dst points to the output matrix.
  3978. * @param[in] row number of the first input matrix rows.
  3979. * @param[in] col number of the first input matrix columns.
  3980. * @param[in] col2 number of the second input matrix columns.
  3981. *
  3982. * <b>Function notes:</b>
  3983. *
  3984. * The 1.15 format input is multiplied yields a 2.30 format, and then added
  3985. * without saturation to a 64-bit accumulator in 34.30 format. Finally,
  3986. * the added output is truncated to 34.15 format by discarding the lower 15
  3987. * bits, and then saturated to yield a result in 1.15 format.
  3988. */
  3989. static inline void hpm_dsp_mat_mul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
  3990. {
  3991. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  3992. #ifdef __zcc__
  3993. return tpt_mat_mult_q15(dst, src1, src2, row, col, col2);
  3994. #else
  3995. riscv_dsp_mat_mul_q15(src1, src2, dst, row, col, col2);
  3996. #endif
  3997. #endif
  3998. }
  3999. static inline void hpm_dsp_mat_mul_fast_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
  4000. {
  4001. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4002. #ifdef __zcc__
  4003. return tpt_mat_mult_q15(dst, src1, src2, row, col, col2);
  4004. #else
  4005. riscv_dsp_mat_mul_fast_q15(src1, src2, dst, row, col, col2);
  4006. #endif
  4007. #endif
  4008. }
  4009. /**
  4010. * @brief Multiplication of two q15 complex matrices.
  4011. * @param[in] *src1 points to the first input complex matrix.
  4012. * @param[in] *src2 points to the second input complex matrix.
  4013. * @param[out] *dst points to the output complex matrix.
  4014. * @param[in] row number of the first input matrix rows.
  4015. * @param[in] col number of the first input matrix columns.
  4016. * @param[in] col2 number of the second input matrix columns.
  4017. *
  4018. * <b>Function notes:</b>
  4019. *
  4020. * The 1.15 format input is multiplied yields a 2.30 format, and then added
  4021. * without saturation to a 64-bit accumulator in 34.30 format. Finally,
  4022. * the added output is truncated to 34.15 format by discarding the lower 15
  4023. * bits, and then saturated to yield a result in 1.15 format.
  4024. */
  4025. static inline void hpm_dsp_cmat_mul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
  4026. {
  4027. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4028. #ifdef __zcc__
  4029. return tpt_mat_cmplx_mult_q15(dst, src1, src2, row, col, col2);
  4030. #else
  4031. riscv_dsp_cmat_mul_q15(src1, src2, dst, row, col, col2);
  4032. #endif
  4033. #endif
  4034. }
  4035. /**
  4036. * @brief Multiplication of two q31 matrices.
  4037. * @param[in] *src1 points to the first input matrix.
  4038. * @param[in] *src2 points to the second input matrix.
  4039. * @param[out] *dst points to the output matrix.
  4040. * @param[in] row number of the first input matrix rows.
  4041. * @param[in] col number of the first input matrix columns.
  4042. * @param[in] col2 number of the second input matrix columns.
  4043. *
  4044. * <b>Function notes:</b>
  4045. *
  4046. * The 1.31 format input is multiplied yields a 2.62 format. In order to
  4047. * avoid overflows, the input signal must be scaled down by
  4048. * <code>log2(col)</code> bits, Finally, the 2.62 accumulator is right
  4049. * shifted by 31 bits to yield a 1.31 format value.
  4050. */
  4051. static inline void hpm_dsp_mat_mul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
  4052. {
  4053. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4054. #ifdef __zcc__
  4055. return tpt_mat_mult_q31(dst, src1, src2, row, col, col2);
  4056. #else
  4057. riscv_dsp_mat_mul_q31(src1, src2, dst, row, col, col2);
  4058. #endif
  4059. #endif
  4060. }
  4061. static inline void hpm_dsp_mat_mul_fast_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
  4062. {
  4063. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4064. #ifdef __zcc__
  4065. return tpt_mat_mult_q31(dst, src1, src2, row, col, col2);
  4066. #else
  4067. riscv_dsp_mat_mul_fast_q31(src1, src2, dst, row, col, col2);
  4068. #endif
  4069. #endif
  4070. }
  4071. /**
  4072. * @brief Multiplication of two q31 complex matrices.
  4073. * @param[in] *src1 points to the first input complex matrix.
  4074. * @param[in] *src2 points to the second input complex matrix.
  4075. * @param[out] *dst points to the output complex matrix.
  4076. * @param[in] row number of the first input matrix rows.
  4077. * @param[in] col number of the first input matrix columns.
  4078. * @param[in] col2 number of the second input matrix columns.
  4079. *
  4080. * <b>Function notes:</b>
  4081. *
  4082. * The 1.31 format input is multiplied yields a 2.62 format. In order to
  4083. * avoid overflows, the input signal must be scaled down by
  4084. * <code>log2(col)</code> bits, Finally, the 2.62 accumulator is right
  4085. * shifted by 31 bits to yield a 1.31 format value.
  4086. */
  4087. static inline void hpm_dsp_cmat_mul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
  4088. {
  4089. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4090. #ifdef __zcc__
  4091. return tpt_mat_cmplx_mult_q31(dst, src1, src2, row, col, col2);
  4092. #else
  4093. riscv_dsp_cmat_mul_q31(src1, src2, dst, row, col, col2);
  4094. #endif
  4095. #endif
  4096. }
  4097. /**
  4098. * @brief Multiplication of two q7 matrices.
  4099. * @param[in] *src1 points to the first input matrix.
  4100. * @param[in] *src2 points to the second input matrix.
  4101. * @param[out] *dst points to the output matrix.
  4102. * @param[in] row number of the first input matrix rows.
  4103. * @param[in] col number of the first input matrix columns.
  4104. * @param[in] col2 number of the second input matrix columns.
  4105. *
  4106. * <b>Function notes:</b>
  4107. *
  4108. * The 1.7 format input is multiplied yields a 2.15 format, and then added
  4109. * without saturation to a 32-bit accumulator in 17.15 format. Finally,
  4110. * the added output is truncated to 17.7 format by discarding the lower 7
  4111. * bits, and then saturated to yield a result in 1.7 format.
  4112. */
  4113. static inline void hpm_dsp_mat_mul_q7(const q7_t *src1, const q7_t *src2, q7_t *dst, uint32_t row, uint32_t col, uint32_t col2)
  4114. {
  4115. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4116. riscv_dsp_mat_mul_q7(src1, src2, dst, row, col, col2);
  4117. #endif
  4118. }
  4119. /**
  4120. * @brief Multiplication of q7 vetor by matrix.
  4121. * @param[in] *src1 points to the first input vector.
  4122. * @param[in] *src2 points to the second input matrix.
  4123. * @param[out] *dst points to the output vector.
  4124. * @param[in] col number of the first input vector columns.
  4125. * @param[in] col2 number of the second input matrix columns.
  4126. *
  4127. * <b>Function notes:</b>
  4128. *
  4129. * The 1.7 format input is multiplied yields a 2.15 format, and then added
  4130. * without saturation to a 32-bit accumulator in 17.15 format. Finally,
  4131. * the added output is truncated to 17.7 format by discarding the lower 7
  4132. * bits, and then saturated to yield a result in 1.7 format.
  4133. */
  4134. static inline void hpm_dsp_mat_mul_vxm_q7(const q7_t * src1, const q7_t * src2, q7_t * dst, uint32_t col, uint32_t col2)
  4135. {
  4136. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4137. #ifdef __zcc__
  4138. tpt_mat_mul_mxv_q7(dst, src1, src2, col, col2);
  4139. #else
  4140. riscv_dsp_mat_mul_vxm_q7(src1, src2, dst, col, col2);
  4141. #endif
  4142. #endif
  4143. }
  4144. // Matrix Power 2 Function
  4145. //
  4146. // The input is a square matrix for riscv_dsp_mat_pow2_cache_f64.
  4147. static inline int32_t hpm_dsp_mat_pwr2_cache_f64(const float64_t *src, float64_t *dst, uint32_t size)
  4148. {
  4149. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4150. return riscv_dsp_mat_pwr2_cache_f64(src, dst, size);
  4151. #endif
  4152. }
  4153. // Matrix Scale
  4154. /**
  4155. * @brief Multiplt a scale value of floating-potint matrix.
  4156. * @param[in] *src points to the input matrix.
  4157. * @param[in] scale is the factor to be multiplied.
  4158. * @param[out] *dst points to the output matrix.
  4159. * @param[in] row number of the matrix rows.
  4160. * @param[in] col number of the matrix columns.
  4161. */
  4162. static inline void hpm_dsp_mat_scale_f32(const float32_t *src, float32_t scale, float32_t *dst, uint32_t row, uint32_t col)
  4163. {
  4164. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4165. #ifdef __zcc__
  4166. tpt_mat_scale_f32(dst, src, row, col, scale);
  4167. #else
  4168. riscv_dsp_mat_scale_f32(src, scale, dst, row, col);
  4169. #endif
  4170. #endif
  4171. }
  4172. /**
  4173. * @brief Multiplt a scale value of q15 matrix.
  4174. * @param[in] *src points to the input matrix.
  4175. * @param[in] scale_fract fractional multiplication.
  4176. * @param[in] shift arithmetic shift.
  4177. * @param[out] *dst points to the output matrix.
  4178. * @param[in] row number of the matrix rows.
  4179. * @param[in] col number of the matrix columns.
  4180. *
  4181. * <b>Function notes:</b>
  4182. *
  4183. * The 1.15 format inputs are multiplied to yield a 2.30 intermediate result
  4184. * and this is shifted with saturation to 1.15 format.
  4185. */
  4186. static inline void hpm_dsp_mat_scale_q15(const q15_t *src, q15_t scale_fract, int32_t shift, q15_t *dst, uint32_t row, uint32_t col)
  4187. {
  4188. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4189. #ifdef __zcc__
  4190. tpt_mat_scale_q15(dst, src, row, col, scale_fract, shift);
  4191. #else
  4192. riscv_dsp_mat_scale_q15(src, scale_fract, shift, dst, row, col);
  4193. #endif
  4194. #endif
  4195. }
  4196. /**
  4197. * @brief Multiplt a scale value of q31 matrix.
  4198. * @param[in] *src points to the input matrix.
  4199. * @param[in] scale_fract fractional multiplication.
  4200. * @param[in] shift arithmetic shift.
  4201. * @param[out] *dst points to the output matrix.
  4202. * @param[in] row number of the matrix rows.
  4203. * @param[in] col number of the matrix columns.
  4204. *
  4205. * <b>Function notes:</b>
  4206. *
  4207. * The 1.31 format input are multiplied to yield a 2.62 intermediate result
  4208. * and this is shifted with saturation to 1.31 format.
  4209. */
  4210. static inline void hpm_dsp_mat_scale_q31(const q31_t *src, q31_t scale_fract, int32_t shift, q31_t *dst, uint32_t row, uint32_t col)
  4211. {
  4212. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4213. #ifdef __zcc__
  4214. tpt_mat_scale_q31(dst, src, row, col, scale_fract, shift);
  4215. #else
  4216. riscv_dsp_mat_scale_q31(src, scale_fract, shift, dst, row, col);
  4217. #endif
  4218. #endif
  4219. }
  4220. // Matrix Subtraction
  4221. /**
  4222. * @brief Substraction of two double-precision floating-potint matrices.
  4223. * @param[in] src1 pointer of the first input matrix
  4224. * @param[in] src2 pointer of the second input matrix
  4225. * @param[out] dst pointer of the output matrix
  4226. * @param[in] row number of rows in a matrix
  4227. * @param[in] col number of columns in a matrix
  4228. *
  4229. */
  4230. static inline void hpm_dsp_mat_sub_f64(const float64_t *src1, const float64_t *src2,
  4231. float64_t *dst, uint32_t row, uint32_t col)
  4232. {
  4233. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4234. #ifdef __zcc__
  4235. tpt_mat_sub_f64(dst, src1, src2, row, col);
  4236. #else
  4237. riscv_dsp_mat_sub_f64(src1, src2, dst, row, col);
  4238. #endif
  4239. #endif
  4240. }
  4241. /**
  4242. * @brief Substraction of two floating-potint matrices.
  4243. * @param[in] *src1 points to the first input matrix.
  4244. * @param[in] *src2 points to the second input matrix.
  4245. * @param[out] *dst points to the output matrix.
  4246. * @param[in] row number of the matrix rows.
  4247. * @param[in] col number of the matrix columns.
  4248. */
  4249. static inline void hpm_dsp_mat_sub_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
  4250. {
  4251. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4252. #ifdef __zcc__
  4253. tpt_mat_sub_f32(dst, src1, src2, row, col);
  4254. #else
  4255. riscv_dsp_mat_sub_f32(src1, src2, dst, row, col);
  4256. #endif
  4257. #endif
  4258. }
  4259. /**
  4260. * @brief Substraction of two q15 matrices.
  4261. * @param[in] *src1 points to the first input matrix.
  4262. * @param[in] *src2 points to the second input matrix.
  4263. * @param[out] *dst points to the output matrix.
  4264. * @param[in] row number of the matrix rows.
  4265. * @param[in] col number of the matrix columns.
  4266. *
  4267. * The output results will be saturated in Q15 range [0x8000 0x7FFF].
  4268. */
  4269. static inline void hpm_dsp_mat_sub_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
  4270. {
  4271. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4272. #ifdef __zcc__
  4273. tpt_mat_sub_q15(dst, src1, src2, row, col);
  4274. #else
  4275. riscv_dsp_mat_sub_q15(src1, src2, dst, row, col);
  4276. #endif
  4277. #endif
  4278. }
  4279. /**
  4280. * @brief Substraction of two q31 matrices.
  4281. * @param[in] *src1 points to the first input matrix.
  4282. * @param[in] *src2 points to the second input matrix.
  4283. * @param[out] *dst points to the output matrix.
  4284. * @param[in] row number of the matrix rows.
  4285. * @param[in] col number of the matrix columns.
  4286. *
  4287. * Ouput results will be saturated in Q31 range [0x80000000 0x7FFFFFFF].
  4288. */
  4289. static inline void hpm_dsp_mat_sub_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
  4290. {
  4291. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4292. #ifdef __zcc__
  4293. tpt_mat_sub_q31(dst, src1, src2, row, col);
  4294. #else
  4295. riscv_dsp_mat_sub_q31(src1, src2, dst, row, col);
  4296. #endif
  4297. #endif
  4298. }
  4299. // Matrix Transpose
  4300. /**
  4301. * @brief Transpose the double-precision floating-potint matrices.
  4302. * @param[in] src pointer of the input matrix
  4303. * @param[out] dst pointer of the output matrix
  4304. * @param[in] row number of rows in a matrix
  4305. * @param[in] col number of columns in a matrix
  4306. *
  4307. */
  4308. static inline void hpm_dsp_mat_trans_f64(const float64_t *src, float64_t *dst, uint32_t row, uint32_t col)
  4309. {
  4310. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4311. #ifdef __zcc__
  4312. tpt_mat_trans_f64(dst, src, row, col);
  4313. #else
  4314. riscv_dsp_mat_trans_f64(src, dst, row, col);
  4315. #endif
  4316. #endif
  4317. }
  4318. /**
  4319. * @brief Transpose the floating-potint matricex.
  4320. * @param[in] *src points to the input matrix.
  4321. * @param[out] *dst points to the output matrix.
  4322. * @param[in] row number of the matrix rows.
  4323. * @param[in] col number of the matrix columns.
  4324. */
  4325. static inline void hpm_dsp_mat_trans_f32(const float32_t *src, float32_t *dst, uint32_t row, uint32_t col)
  4326. {
  4327. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4328. riscv_dsp_mat_trans_f32(src, dst, row, col);
  4329. #endif
  4330. }
  4331. /**
  4332. * @brief Transpose the q15 matricex.
  4333. * @param[in] *src points to the input matrix.
  4334. * @param[out] *dst points to the output matrix.
  4335. * @param[in] row number of the matrix rows.
  4336. * @param[in] col number of the matrix columns.
  4337. */
  4338. static inline void hpm_dsp_mat_trans_q15(const q15_t *src, q15_t *dst, uint32_t row, uint32_t col)
  4339. {
  4340. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4341. #ifdef __zcc__
  4342. tpt_mat_trans_q15(dst, src, row, col);
  4343. #else
  4344. riscv_dsp_mat_trans_q15(src, dst, row, col);
  4345. #endif
  4346. #endif
  4347. }
  4348. /**
  4349. * @brief Transpose the q31 matricex.
  4350. * @param[in] *src points to the input matrix.
  4351. * @param[out] *dst points to the output matrix.
  4352. * @param[in] row number of the matrix rows.
  4353. * @param[in] col number of the matrix columns.
  4354. */
  4355. static inline void hpm_dsp_mat_trans_q31(const q31_t *src, q31_t *dst, uint32_t row, uint32_t col)
  4356. {
  4357. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4358. #ifdef __zcc__
  4359. tpt_mat_trans_q31(dst, src, row, col);
  4360. #else
  4361. riscv_dsp_mat_trans_q31(src, dst, row, col);
  4362. #endif
  4363. #endif
  4364. }
  4365. /**
  4366. * @brief Transpose the u8 matricex.
  4367. * @param[in] *src points to the input matrix.
  4368. * @param[out] *dst points to the output matrix.
  4369. * @param[in] row number of the matrix rows.
  4370. * @param[in] col number of the matrix columns.
  4371. */
  4372. static inline void hpm_dsp_mat_trans_u8(const uint8_t *src, uint8_t *dst, uint32_t row, uint32_t col)
  4373. {
  4374. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4375. riscv_dsp_mat_trans_u8(src, dst, row, col);
  4376. #endif
  4377. }
  4378. /**
  4379. * @brief Transpose the q7 matrices.
  4380. * @param[in] src pointer of the input matrix
  4381. * @param[out] dst pointer of the output matrix
  4382. * @param[in] row number of rows in a matrix
  4383. * @param[in] col number of columns in a matrix
  4384. *
  4385. */
  4386. static inline void hpm_dsp_mat_trans_q7(const q7_t *src, q7_t *dst, uint32_t row, uint32_t col)
  4387. {
  4388. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4389. riscv_dsp_mat_trans_q7(src, dst, row, col);
  4390. #endif
  4391. }
  4392. /**
  4393. * @brief Outer production of two q31 matrices.
  4394. * @param[in] src1 pointer of the first input matrix with a size of size1*1
  4395. * @param[in] src2 pointer of the second input matrix with a size of 1*size2
  4396. * @param[out] dst pointer of the output matrix with a size of size1 * size2
  4397. * @param[in] size1 number of rows in the first input matrix.
  4398. * @param[in] size2 number of columns in the second input matrix.
  4399. *
  4400. *
  4401. * @b Note:
  4402. *
  4403. * This function multiplies a one-column matrix with size1 rows, src1[size1, 1], with a
  4404. * one-row matrix with size2 columns, src2[1, size2], and stores the result into a matrix
  4405. * with size1 rows and size2 columns, dst[size1, size2]. It achieves better efficiency for
  4406. * vector-wise matrix multiplication than for regular matrix multiplication.
  4407. *
  4408. * @b Example
  4409. * <pre>
  4410. * The following equation shows the outer product of two matrices and its result.
  4411. *
  4412. *
  4413. * Its code example is as follows:
  4414. *
  4415. * \#define Arow 3
  4416. * \#define Bcol 2
  4417. * q31_t src1[Arow] = {0x200000, 0x100000, 0x50000};
  4418. * q31_t src2[Bcol] = {0x10000, 0x30000};
  4419. * q31_t dst[Arow * Bcol];
  4420. * hpm_dsp_mat_oprod_q31 (src1, src2, dst, Arow, Bcol);
  4421. * </pre>
  4422. */
  4423. static inline void hpm_dsp_mat_oprod_q31(const q31_t * src1, const q31_t * src2,
  4424. q31_t * dst, uint32_t size1, uint32_t size2)
  4425. {
  4426. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4427. #ifdef __zcc__
  4428. tpt_mat_oprod_q31(dst, src1, src2, size1, size2);
  4429. #else
  4430. riscv_dsp_mat_oprod_q31(src1, src2, dst, size1, size2);
  4431. #endif
  4432. #endif
  4433. }
  4434. /**
  4435. * @brief Matrix multiply vector for f32 formats
  4436. * @param[in] src1 pointer of the input matrix
  4437. * @param[in] src2 pointer of the input vector
  4438. * @param[out] dst pointer of the output vector
  4439. * @param[in] row number of rows in the matrix
  4440. * @param[in] col number of columns in the matrix and the elements size of vector
  4441. *
  4442. *
  4443. * @b Example
  4444. * <pre>
  4445. *
  4446. * \#define Arow 2
  4447. * \#define Acol 3
  4448. * float32_t src1[Arow * Acol] = {0.1, -0.1, 0.1, 0.2, -0.2, 0.3};
  4449. * float32_t src2[Acol] = {0.2, -0.1, -0.7};
  4450. * float32_t dst[Arow];
  4451. * hpm_dsp_mat_mul_mxv_f32 (src1, src2, dst, Arow, Acol);
  4452. *
  4453. * This example also serves as a reference for examples of Q31, Q15 or Q7 functions.
  4454. * </pre>
  4455. */
  4456. static inline void hpm_dsp_mat_mul_mxv_f32(const float32_t *src1, const float32_t *src2,
  4457. float32_t *dst, uint32_t row, uint32_t col)
  4458. {
  4459. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4460. #ifdef __zcc__
  4461. tpt_mat_mul_mxv_f32(dst, src1, src2, row, col);
  4462. #else
  4463. riscv_dsp_mat_mul_mxv_f32(src1, src2, dst, row, col);
  4464. #endif
  4465. #endif
  4466. }
  4467. /**
  4468. * @brief Matrix multiply vector for q15 formats
  4469. * @param[in] src1 pointer of the input matrix
  4470. * @param[in] src2 pointer of the input vector
  4471. * @param[out] dst pointer of the output vector
  4472. * @param[in] row number of rows in the matrix
  4473. * @param[in] col number of columns in the matrix and the elements size of vector
  4474. *
  4475. */
  4476. static inline void hpm_dsp_mat_mul_mxv_q15(const q15_t *src1, const q15_t *src2,
  4477. q15_t *dst, uint32_t row, uint32_t col)
  4478. {
  4479. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4480. #ifdef __zcc__
  4481. tpt_mat_mul_mxv_q15(dst, src1, src2, row, col);
  4482. #else
  4483. riscv_dsp_mat_mul_mxv_q15(src1, src2, dst, row, col);
  4484. #endif
  4485. #endif
  4486. }
  4487. /**
  4488. * @brief Matrix multiply vector for q31 formats
  4489. * @param[in] src1 pointer of the input matrix
  4490. * @param[in] src2 pointer of the input vector
  4491. * @param[out] dst pointer of the output vector
  4492. * @param[in] row number of rows in the matrix
  4493. * @param[in] col number of columns in the matrix and the elements size of vector
  4494. *
  4495. */
  4496. static inline void hpm_dsp_mat_mul_mxv_q31(const q31_t *src1, const q31_t *src2,
  4497. q31_t *dst, uint32_t row, uint32_t col)
  4498. {
  4499. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4500. #ifdef __zcc__
  4501. tpt_mat_mul_mxv_q31(dst, src1, src2, row, col);
  4502. #else
  4503. riscv_dsp_mat_mul_mxv_q31(src1, src2, dst, row, col);
  4504. #endif
  4505. #endif
  4506. }
  4507. /**
  4508. * @brief Matrix multiply vector for q7 formats
  4509. * @param[in] src1 pointer of the input matrix
  4510. * @param[in] src2 pointer of the input vector
  4511. * @param[out] dst pointer of the output vector
  4512. * @param[in] row number of rows in the matrix
  4513. * @param[in] col number of columns in the matrix and the elements size of vector
  4514. *
  4515. */
  4516. static inline void hpm_dsp_mat_mul_mxv_q7(const q7_t *src1, const q7_t *src2,
  4517. q7_t *dst, uint32_t row, uint32_t col)
  4518. {
  4519. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4520. #ifdef __zcc__
  4521. tpt_mat_mul_mxv_q7(dst, src1, src2, row, col);
  4522. #else
  4523. riscv_dsp_mat_mul_mxv_q7(src1, src2, dst, row, col);
  4524. #endif
  4525. #endif
  4526. }
  4527. #endif
  4528. #endif
  4529. /**
  4530. * @}
  4531. *
  4532. */
  4533. #ifdef HPM_MATH_DSP_SVM
  4534. /**
  4535. * @defgroup svm DSP SVM Functions
  4536. * @ingroup hpmmath
  4537. * @{
  4538. */
  4539. #ifdef HPM_EN_MATH_DSP_LIB
  4540. #ifdef __zcc__
  4541. #include "tpt_math.h"
  4542. #endif
  4543. #include "riscv_dsp_svm_math.h"
  4544. /**
  4545. * @brief SVM linear prediction
  4546. * @param[in] instance Pointer to an instance of the linear SVM structure.
  4547. * @param[in] src Pointer to input vector
  4548. * @param[out] result Decision value
  4549. */
  4550. static inline void hpm_dsp_svm_linear_est_f32(const riscv_dsp_svm_linear_f32_t *instance, const float32_t *src, int32_t *result)
  4551. {
  4552. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4553. riscv_dsp_svm_linear_est_f32(instance, src, result);
  4554. #endif
  4555. }
  4556. /**
  4557. * @brief SVM Sigmoid prediction
  4558. * @param[in] instance Pointer to an instance of the linear SVM structure.
  4559. * @param[in] src Pointer to input vector
  4560. * @param[out] result Decision value
  4561. */
  4562. static inline void hpm_dsp_svm_sigmoid_est_f32(const riscv_dsp_svm_sigmoid_f32_t *instance, const float32_t *src, int32_t *result)
  4563. {
  4564. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4565. riscv_dsp_svm_sigmoid_est_f32(instance, src, result);
  4566. #endif
  4567. }
  4568. /**
  4569. * @brief SVM rbf prediction
  4570. * @param[in] instance Pointer to an instance of the linear SVM structure.
  4571. * @param[in] src Pointer to input vector
  4572. * @param[out] result Decision value
  4573. */
  4574. static inline void hpm_dsp_svm_rbf_est_f32(const riscv_dsp_svm_rbf_f32_t *instance, const float32_t *src, int32_t *result)
  4575. {
  4576. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4577. riscv_dsp_svm_rbf_est_f32(instance, src, result);
  4578. #endif
  4579. }
  4580. /**
  4581. * @brief SVM polynomial prediction
  4582. * @param[in] instance Pointer to an instance of the linear SVM structure.
  4583. * @param[in] src Pointer to input vector
  4584. * @param[out] result Decision value
  4585. */
  4586. static inline void hpm_dsp_svm_poly_est_f32(const riscv_dsp_svm_poly_f32_t *instance, const float32_t *src, int32_t *result)
  4587. {
  4588. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4589. riscv_dsp_svm_poly_est_f32(instance, src, result);
  4590. #endif
  4591. }
  4592. #endif
  4593. #endif
  4594. /**
  4595. * @}
  4596. *
  4597. */
  4598. #ifdef HPM_MATH_DSP_TRANSFORM
  4599. /**
  4600. * @defgroup transform DSP Transform Functions
  4601. * @ingroup hpmmath
  4602. * @{
  4603. */
  4604. #ifdef HPM_EN_MATH_DSP_LIB
  4605. #ifdef __zcc__
  4606. #include "tpt_math.h"
  4607. #endif
  4608. #include "riscv_dsp_transform_math.h"
  4609. /**
  4610. * @brief cfft_rd2 of f32 vectors.
  4611. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4612. * output will be stored in the input vector.
  4613. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 13
  4614. * @return 0 success; -1 failure
  4615. *
  4616. * @b Example
  4617. * <pre>
  4618. * Given 128 samples (that is, FFT_LOGN = 7), the example of floating-point Radix-2 CFFT and
  4619. * CIFFT is as follows:
  4620. * \#define FFT_LOGN 7
  4621. * float32_t src[2* (1 << FFT_LOGN)] = {};
  4622. * int32_t ret;
  4623. * ret = hpm_dsp_cfft_rd2_f32(src, FFT_LOGN);
  4624. * if (ret == 0)
  4625. * Success
  4626. * Else
  4627. * Fail
  4628. * ret = hpm_dsp_cifft_rd2_f32(src, FFT_LOGN);
  4629. * if (ret == 0)
  4630. * Success
  4631. * Else
  4632. * Fail
  4633. *
  4634. * This example also serves as a reference for examples of Q31 and Q15 Radix-2 CFFT and
  4635. * CIFFT functions.
  4636. * </pre>
  4637. */
  4638. static inline int32_t hpm_dsp_cfft_rd2_f32(float32_t *src, uint32_t m)
  4639. {
  4640. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4641. #ifdef __zcc__
  4642. return tpt_cfft_f32(src, m, false);
  4643. #else
  4644. return riscv_dsp_cfft_rd2_f32(src, m);
  4645. #endif
  4646. #endif
  4647. }
  4648. /**
  4649. * @brief cifft_rd2 of f32 vectors.
  4650. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4651. * output will be stored in the input vector.
  4652. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 13
  4653. * @return 0 success; -1 failure
  4654. */
  4655. static inline int32_t hpm_dsp_cifft_rd2_f32(float32_t *src, uint32_t m)
  4656. {
  4657. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4658. #ifdef __zcc__
  4659. return tpt_cfft_f32(src, m, true);
  4660. #else
  4661. return riscv_dsp_cifft_rd2_f32(src, m);
  4662. #endif
  4663. #endif
  4664. }
  4665. /**
  4666. * @brief cfft_rd2 of q15 vectors.
  4667. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4668. * output will be stored in the input vector.
  4669. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 13
  4670. * @return 0 success; -1 failure
  4671. *
  4672. * @b Note:
  4673. *
  4674. * The input and output formats are listed below. To satisfy the input format corresponding to
  4675. * your input size, you may need to perform an arithmetic shift operation before calling this
  4676. * function.
  4677. */
  4678. static inline int32_t hpm_dsp_cfft_rd2_q15(q15_t *src, uint32_t m)
  4679. {
  4680. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4681. #ifdef __zcc__
  4682. return tpt_cfft_q15(src, m, false);
  4683. #else
  4684. return riscv_dsp_cfft_rd2_q15(src, m);
  4685. #endif
  4686. #endif
  4687. }
  4688. /**
  4689. * @brief cifft_rd2 of q15 vectors.
  4690. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4691. * output will be stored in the input vector.
  4692. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 13
  4693. * @return 0 success; -1 failure
  4694. *
  4695. * @b Note:
  4696. *
  4697. * The input and output formats are listed below. To satisfy the input format corresponding to
  4698. * your input size, you may need to perform an arithmetic shift operation before calling this
  4699. * function.
  4700. */
  4701. static inline int32_t hpm_dsp_cifft_rd2_q15(q15_t *src, uint32_t m)
  4702. {
  4703. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4704. #ifdef __zcc__
  4705. return tpt_cfft_q15(src, m, true);
  4706. #else
  4707. return riscv_dsp_cifft_rd2_q15(src, m);
  4708. #endif
  4709. #endif
  4710. }
  4711. /**
  4712. * @brief cfft_rd2 of q31 vectors.
  4713. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4714. * output will be stored in the input vector.
  4715. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 13
  4716. * @return 0 success; -1 failure
  4717. *
  4718. * @b Note:
  4719. *
  4720. * The input and output formats are listed below. To satisfy the input format corresponding to
  4721. * your input size, you may need to perform an arithmetic shift operation before calling this
  4722. * function.
  4723. */
  4724. static inline int32_t hpm_dsp_cfft_rd2_q31(q31_t *src, uint32_t m)
  4725. {
  4726. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4727. #ifdef __zcc__
  4728. return tpt_cfft_q31(src, m, false);
  4729. #else
  4730. return riscv_dsp_cfft_rd2_q31(src, m);
  4731. #endif
  4732. #endif
  4733. }
  4734. /**
  4735. * @brief cfft_rd2 of q31 vectors.
  4736. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4737. * output will be stored in the input vector.
  4738. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 13
  4739. * @return 0 success; -1 failure
  4740. *
  4741. * @b Note:
  4742. *
  4743. * The input and output formats are listed below. To satisfy the input format corresponding to
  4744. * your input size, you may need to perform an arithmetic shift operation before calling this
  4745. * function.
  4746. */
  4747. static inline int32_t hpm_dsp_cifft_rd2_q31(q31_t *src, uint32_t m)
  4748. {
  4749. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4750. #ifdef __zcc__
  4751. return tpt_cfft_q31(src, m, true);
  4752. #else
  4753. return riscv_dsp_cifft_rd2_q31(src, m);
  4754. #endif
  4755. #endif
  4756. }
  4757. /**
  4758. * @brief cfft_rd4 of f32 vectors.
  4759. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4760. * output will be stored in the input vector.
  4761. * @param[in] m base 2 logarithm value of the sample number and it can be set as 4, 6, 8 or 10
  4762. * @return 0 success; -1 failure
  4763. *
  4764. * @b Example
  4765. * <pre>
  4766. * Given 256 samples (that is, FFT_LOGN = 8), the example of floating-point Radix-4 CFFT and
  4767. * CIFFT is as follows:
  4768. * \#define FFT_LOGN 8
  4769. * float32_t src[2* (1 << FFT_LOGN)] = {};
  4770. * int32_t ret;
  4771. * ret = hpm_dsp_cfft_rd4_f32(src, FFT_LOGN);
  4772. * if (ret == 0)
  4773. * Success
  4774. * Else
  4775. * Fail
  4776. * ret = riscv_dsp_cifft_rd4_f32(src, FFT_LOGN);
  4777. * if (ret == 0)
  4778. * Success
  4779. * Else
  4780. * Fail
  4781. *
  4782. * This example also serves as a reference for examples of Q31 or Q15 Radix-4 CFFT and
  4783. * CIFFT functions.
  4784. * </pre>
  4785. */
  4786. static inline int32_t hpm_dsp_cfft_rd4_f32(float32_t *src, uint32_t m)
  4787. {
  4788. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4789. #ifdef __zcc__
  4790. return tpt_cfft_f32(src, m, false);
  4791. #else
  4792. return riscv_dsp_cfft_rd4_f32(src, m);
  4793. #endif
  4794. #endif
  4795. }
  4796. /**
  4797. * @brief cifft_rd4 of f32 vectors.
  4798. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4799. * output will be stored in the input vector.
  4800. * @param[in] m base 2 logarithm value of the sample number and it can be set as 4, 6, 8 or 10
  4801. * @return 0 success; -1 failure
  4802. */
  4803. static inline int32_t hpm_dsp_cifft_rd4_f32(float32_t *src, uint32_t m)
  4804. {
  4805. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4806. #ifdef __zcc__
  4807. return tpt_cfft_f32(src, m, true);
  4808. #else
  4809. return riscv_dsp_cifft_rd4_f32(src, m);
  4810. #endif
  4811. #endif
  4812. }
  4813. /**
  4814. * @brief cfft_rd4 of q15 vectors.
  4815. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4816. * output will be stored in the input vector.
  4817. * @param[in] m base 2 logarithm value of the sample number and it can be set as 4, 6, 8 or 10
  4818. * @return 0 success; -1 failure
  4819. *
  4820. * @b Note:
  4821. *
  4822. * The input and output formats are listed below. To satisfy the input format corresponding to
  4823. * your input size, you may need to perform an arithmetic shift operation before calling this
  4824. * function.
  4825. */
  4826. static inline int32_t hpm_dsp_cfft_rd4_q15(q15_t *src, uint32_t m)
  4827. {
  4828. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4829. #ifdef __zcc__
  4830. return tpt_cfft_q15(src, m, false);
  4831. #else
  4832. return riscv_dsp_cfft_rd4_q15(src, m);
  4833. #endif
  4834. #endif
  4835. }
  4836. /**
  4837. * @brief cifft_rd4 of q15 vectors.
  4838. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4839. * output will be stored in the input vector.
  4840. * @param[in] m base 2 logarithm value of the sample number and it can be set as 4, 6, 8 or 10
  4841. * @return 0 success; -1 failure
  4842. *
  4843. * @b Note:
  4844. *
  4845. * The input and output formats are listed below. To satisfy the input format corresponding to
  4846. * your input size, you may need to perform an arithmetic shift operation before calling this
  4847. * function.
  4848. */
  4849. static inline int32_t hpm_dsp_cifft_rd4_q15(q15_t *src, uint32_t m)
  4850. {
  4851. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4852. #ifdef __zcc__
  4853. return tpt_cfft_q15(src, m, true);
  4854. #else
  4855. return riscv_dsp_cifft_rd4_q15(src, m);
  4856. #endif
  4857. #endif
  4858. }
  4859. /**
  4860. * @brief cfft_rd4 of q31 vectors.
  4861. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4862. * output will be stored in the input vector.
  4863. * @param[in] m base 2 logarithm value of the sample number and it can be set as 4, 6, 8 or 10
  4864. * @return 0 success; -1 failure
  4865. *
  4866. * @b Note:
  4867. *
  4868. * The input and output formats are listed below. To satisfy the input format corresponding to
  4869. * your input size, you may need to perform an arithmetic shift operation before calling this
  4870. * function.
  4871. */
  4872. static inline int32_t hpm_dsp_cfft_rd4_q31(q31_t *src, uint32_t m)
  4873. {
  4874. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4875. #ifdef __zcc__
  4876. return tpt_cfft_q31(src, m, false);
  4877. #else
  4878. return riscv_dsp_cfft_rd4_q31(src, m);
  4879. #endif
  4880. #endif
  4881. }
  4882. /**
  4883. * @brief cifft_rd4 of q31 vectors.
  4884. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4885. * output will be stored in the input vector.
  4886. * @param[in] m base 2 logarithm value of the sample number and it can be set as 4, 6, 8 or 10
  4887. * @return 0 success; -1 failure
  4888. *
  4889. * @b Note:
  4890. *
  4891. * The input and output formats are listed below. To satisfy the input format corresponding to
  4892. * your input size, you may need to perform an arithmetic shift operation before calling this
  4893. * function.
  4894. */
  4895. static inline int32_t hpm_dsp_cifft_rd4_q31(q31_t *src, uint32_t m)
  4896. {
  4897. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4898. #ifdef __zcc__
  4899. return tpt_cfft_q31(src, m, true);
  4900. #else
  4901. return riscv_dsp_cifft_rd4_q31(src, m);
  4902. #endif
  4903. #endif
  4904. }
  4905. /**
  4906. * @brief cfft of f32 vectors.
  4907. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4908. * output will be stored in the input vector.
  4909. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 13
  4910. *
  4911. * @b Example
  4912. * <pre>
  4913. * Given 128 samples (that is, FFT_LOGN = 7), the example of floating-point CFFT and
  4914. * CIFFT is as follows:
  4915. * \#define FFT_LOGN 7
  4916. * float32_t src[2* (1 << FFT_LOGN)] = {};
  4917. * int32_t ret;
  4918. * hpm_dsp_cfft_f32(src, FFT_LOGN);
  4919. * hpm_dsp_cifft_f32(src, FFT_LOGN);
  4920. *
  4921. * This example also serves as a reference for examples of F16, F64, Q31 and Q15 CFFT and
  4922. * CIFFT functions.
  4923. * </pre>
  4924. */
  4925. static inline void hpm_dsp_cfft_f32(float32_t *src, uint32_t m)
  4926. {
  4927. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4928. #ifdef __zcc__
  4929. tpt_cfft_f32(src, m, false);
  4930. #else
  4931. riscv_dsp_cfft_f32(src, m);
  4932. #endif
  4933. #endif
  4934. }
  4935. /**
  4936. * @brief cfft of f64 vectors.
  4937. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4938. * output will be stored in the input vector.
  4939. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 13
  4940. */
  4941. static inline void hpm_dsp_cfft_f64(float64_t *src, uint32_t m)
  4942. {
  4943. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4944. #ifdef __zcc__
  4945. tpt_cfft_f64(src, m, false);
  4946. #else
  4947. riscv_dsp_cfft_f64(src, m);
  4948. #endif
  4949. #endif
  4950. }
  4951. /**
  4952. * @brief cifft of f32 vectors.
  4953. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4954. * output will be stored in the input vector.
  4955. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 13
  4956. */
  4957. static inline void hpm_dsp_cifft_f32(float32_t *src, uint32_t m)
  4958. {
  4959. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4960. #ifdef __zcc__
  4961. tpt_cfft_f32(src, m, true);
  4962. #else
  4963. riscv_dsp_cifft_f32(src, m);
  4964. #endif
  4965. #endif
  4966. }
  4967. /**
  4968. * @brief cifft of f64 vectors.
  4969. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4970. * output will be stored in the input vector.
  4971. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 13
  4972. */
  4973. static inline void hpm_dsp_cifft_f64(float64_t *src, uint32_t m)
  4974. {
  4975. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4976. #ifdef __zcc__
  4977. tpt_cfft_f64(src, m, true);
  4978. #else
  4979. riscv_dsp_cifft_f64(src, m);
  4980. #endif
  4981. #endif
  4982. }
  4983. /**
  4984. * @brief cfft of q15 vectors.
  4985. * @param[in, out] src pointer of the input vector. After the function is executed, the
  4986. * output will be stored in the input vector.
  4987. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 13
  4988. *
  4989. * @b Note:
  4990. *
  4991. * The input and output formats are listed below. To satisfy the input format corresponding to
  4992. * your input size, you may need to perform an arithmetic shift operation before calling this
  4993. * function.
  4994. */
  4995. static inline void hpm_dsp_cfft_q15(q15_t *src, uint32_t m)
  4996. {
  4997. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  4998. #ifdef __zcc__
  4999. tpt_cfft_q15(src, m, false);
  5000. #else
  5001. riscv_dsp_cfft_q15(src, m);
  5002. #endif
  5003. #endif
  5004. }
  5005. /**
  5006. * @brief cifft of q15 vectors.
  5007. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5008. * output will be stored in the input vector.
  5009. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 13
  5010. *
  5011. * @b Note:
  5012. *
  5013. * The input and output formats are listed below. To satisfy the input format corresponding to
  5014. * your input size, you may need to perform an arithmetic shift operation before calling this
  5015. * function.
  5016. */
  5017. static inline void hpm_dsp_cifft_q15(q15_t *src, uint32_t m)
  5018. {
  5019. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5020. #ifdef __zcc__
  5021. tpt_cfft_q15(src, m, true);
  5022. #else
  5023. riscv_dsp_cifft_q15(src, m);
  5024. #endif
  5025. #endif
  5026. }
  5027. /**
  5028. * @brief cfft of q31 vectors.
  5029. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5030. * output will be stored in the input vector.
  5031. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 13
  5032. *
  5033. * @b Note:
  5034. *
  5035. * The input and output formats are listed below. To satisfy the input format corresponding to
  5036. * your input size, you may need to perform an arithmetic shift operation before calling this
  5037. * function.
  5038. */
  5039. static inline void hpm_dsp_cfft_q31(q31_t *src, uint32_t m)
  5040. {
  5041. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5042. #ifdef __zcc__
  5043. tpt_cfft_q31(src, m, false);
  5044. #else
  5045. riscv_dsp_cfft_q31(src, m);
  5046. #endif
  5047. #endif
  5048. }
  5049. /**
  5050. * @brief cifft of q31 vectors.
  5051. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5052. * output will be stored in the input vector.
  5053. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 13
  5054. *
  5055. * @b Note:
  5056. *
  5057. * The input and output formats are listed below. To satisfy the input format corresponding to
  5058. * your input size, you may need to perform an arithmetic shift operation before calling this
  5059. * function.
  5060. */
  5061. static inline void hpm_dsp_cifft_q31(q31_t *src, uint32_t m)
  5062. {
  5063. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5064. #ifdef __zcc__
  5065. tpt_cfft_q31(src, m, true);
  5066. #else
  5067. riscv_dsp_cifft_q31(src, m);
  5068. #endif
  5069. #endif
  5070. }
  5071. /**
  5072. * @brief rfft of f32 vectors.
  5073. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5074. * output will be stored in the input vector.
  5075. * @param[in] m base 2 logarithm value of the sample number and it can be set from 4 to 14
  5076. * @return 0 success; -1 failure
  5077. *
  5078. * @b Example
  5079. * <pre>
  5080. * Given 128 samples (that is, FFT_LOGN = 7), the example of floating-point RFFT and RIFFT
  5081. * is as follows:
  5082. * \#define FFT_LOGN 7
  5083. * float32_t src[(1 << FFT_LOGN)] = {};
  5084. * int32_t ret;
  5085. * ret = hpm_dsp_rfft_f32(src, FFT_LOGN);
  5086. * if (ret == 0)
  5087. * Success
  5088. * else
  5089. * Fail
  5090. * ret = riscv_dsp_rifft_f32(src, FFT_LOGN);
  5091. * if (ret == 0)
  5092. * Success
  5093. * else
  5094. * Fail
  5095. *
  5096. * This example also serves as a reference for examples of Q31 or Q15 RFFT and RIFFT
  5097. * functions.
  5098. * </pre>
  5099. */
  5100. static inline int32_t hpm_dsp_rfft_f32(float32_t *src, uint32_t m)
  5101. {
  5102. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5103. #ifdef __zcc__
  5104. return tpt_rfft_f32(src, src, m, false);
  5105. #else
  5106. return riscv_dsp_rfft_f32(src, m);
  5107. #endif
  5108. #endif
  5109. }
  5110. /**
  5111. * @brief rfft of f64 vectors.
  5112. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5113. * output will be stored in the input vector.
  5114. * @param[in] m base 2 logarithm value of the sample number and it can be set from 4 to 14
  5115. * @return 0 success; -1 failure
  5116. */
  5117. static inline int32_t hpm_dsp_rfft_f64(float64_t *src, uint32_t m)
  5118. {
  5119. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5120. return riscv_dsp_rfft_f64(src, m);
  5121. #endif
  5122. }
  5123. /**
  5124. * @brief rifft of f32 vectors.
  5125. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5126. * output will be stored in the input vector.
  5127. * @param[in] m base 2 logarithm value of the sample number and it can be set from 4 to 14
  5128. * @return 0 success; -1 failure
  5129. */
  5130. static inline int32_t hpm_dsp_rifft_f32(float32_t *src, uint32_t m)
  5131. {
  5132. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5133. return riscv_dsp_rifft_f32(src, m);
  5134. #endif
  5135. }
  5136. /**
  5137. * @brief rifft of f64 vectors.
  5138. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5139. * output will be stored in the input vector.
  5140. * @param[in] m base 2 logarithm value of the sample number and it can be set from 4 to 14
  5141. * @return 0 success; -1 failure
  5142. */
  5143. static inline int32_t hpm_dsp_rifft_f64(float64_t *src, uint32_t m)
  5144. {
  5145. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5146. return riscv_dsp_rifft_f64(src, m);
  5147. #endif
  5148. }
  5149. /**
  5150. * @brief rfft of q15 vectors.
  5151. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5152. * output will be stored in the input vector.
  5153. * @param[in] m base 2 logarithm value of the sample number and it can be set from 4 to 14
  5154. * @return 0 success; -1 failure
  5155. *
  5156. * @b Note:
  5157. *
  5158. * The input and output formats are listed below. To satisfy the input format corresponding to
  5159. * your input size, you may need to perform an arithmetic shift operation before calling this
  5160. * function.
  5161. */
  5162. static inline int32_t hpm_dsp_rfft_q15(q15_t *src, uint32_t m)
  5163. {
  5164. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5165. return riscv_dsp_rfft_q15(src, m);
  5166. #endif
  5167. }
  5168. /**
  5169. * @brief rifft of q15 vectors.
  5170. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5171. * output will be stored in the input vector.
  5172. * @param[in] m base 2 logarithm value of the sample number and it can be set from 4 to 14
  5173. * @return 0 success; -1 failure
  5174. *
  5175. * @b Note:
  5176. *
  5177. * The input and output formats are listed below. To satisfy the input format corresponding to
  5178. * your input size, you may need to perform an arithmetic shift operation before calling this
  5179. * function.
  5180. */
  5181. static inline int32_t hpm_dsp_rifft_q15(q15_t *src, uint32_t m)
  5182. {
  5183. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5184. return riscv_dsp_rifft_q15(src, m);
  5185. #endif
  5186. }
  5187. /**
  5188. * @brief rfft of q31 vectors.
  5189. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5190. * output will be stored in the input vector.
  5191. * @param[in] m base 2 logarithm value of the sample number and it can be set from 4 to 14
  5192. * @return 0 success; -1 failure
  5193. *
  5194. * @b Note:
  5195. *
  5196. * The input and output formats are listed below. To satisfy the input format corresponding to
  5197. * your input size, you may need to perform an arithmetic shift operation before calling this
  5198. * function.
  5199. */
  5200. static inline int32_t hpm_dsp_rfft_q31(q31_t *src, uint32_t m)
  5201. {
  5202. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5203. return riscv_dsp_rfft_q31(src, m);
  5204. #endif
  5205. }
  5206. /**
  5207. * @brief rifft of q31 vectors.
  5208. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5209. * output will be stored in the input vector.
  5210. * @param[in] m base 2 logarithm value of the sample number and it can be set from 4 to 14
  5211. * @return 0 success; -1 failure
  5212. *
  5213. * @b Note:
  5214. *
  5215. * The input and output formats are listed below. To satisfy the input format corresponding to
  5216. * your input size, you may need to perform an arithmetic shift operation before calling this
  5217. * function.
  5218. */
  5219. static inline int32_t hpm_dsp_rifft_q31(q31_t *src, uint32_t m)
  5220. {
  5221. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5222. return riscv_dsp_rifft_q31(src, m);
  5223. #endif
  5224. }
  5225. /**
  5226. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5227. * output will be stored in the input vector.
  5228. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 8
  5229. *
  5230. *
  5231. * @b Example
  5232. * <pre>
  5233. * Given 256 samples (that is, FFT_LOGN = 8), the example of floating-point (DCT) type II and
  5234. * IDCT is as follows:
  5235. * \#define FFT_LOGN 8
  5236. * float32_t src[(1 << FFT_LOGN)] = {};
  5237. * riscv_dsp_dct_f32(src, FFT_LOGN);
  5238. * riscv_dsp_idct_f32(src, FFT_LOGN);
  5239. * This example also serves as a reference for examples of Q31 or Q15 DCT type II and IDCT
  5240. * functions.
  5241. * </pre>
  5242. */
  5243. static inline void hpm_dsp_dct_f32(float32_t *src, uint32_t m)
  5244. {
  5245. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5246. riscv_dsp_dct_f32(src, m);
  5247. #endif
  5248. }
  5249. /**
  5250. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5251. * output will be stored in the input vector.
  5252. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 8
  5253. *
  5254. */
  5255. static inline void hpm_dsp_idct_f32(float32_t *src, uint32_t m)
  5256. {
  5257. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5258. riscv_dsp_idct_f32(src, m);
  5259. #endif
  5260. }
  5261. /**
  5262. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5263. * output will be stored in the input vector.
  5264. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 8
  5265. *
  5266. *
  5267. * @b Note:
  5268. *
  5269. * The input and output formats are listed below. To satisfy the input format corresponding to
  5270. * your input size, you may need to perform an arithmetic shift operation before calling this
  5271. * function.
  5272. */
  5273. static inline void hpm_dsp_dct_q15(q15_t *src, uint32_t m)
  5274. {
  5275. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5276. riscv_dsp_dct_q15(src, m);
  5277. #endif
  5278. }
  5279. /**
  5280. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5281. * output will be stored in the input vector.
  5282. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 8
  5283. *
  5284. *
  5285. * @b Note:
  5286. *
  5287. * The input and output formats are listed below. To satisfy the input format corresponding to
  5288. * your input size, you may need to perform an arithmetic shift operation before calling this
  5289. * function.
  5290. */
  5291. static inline void hpm_dsp_idct_q15(q15_t *src, uint32_t m)
  5292. {
  5293. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5294. riscv_dsp_idct_q15(src, m);
  5295. #endif
  5296. }
  5297. /**
  5298. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5299. * output will be stored in the input vector.
  5300. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 8
  5301. *
  5302. *
  5303. * @b Note:
  5304. *
  5305. * The input and output formats are listed below. To satisfy the input format corresponding to
  5306. * your input size, you may need to perform an arithmetic shift operation before calling this
  5307. * function.
  5308. */
  5309. static inline void hpm_dsp_dct_q31(q31_t *src, uint32_t m)
  5310. {
  5311. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5312. riscv_dsp_dct_q31(src, m);
  5313. #endif
  5314. }
  5315. /**
  5316. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5317. * output will be stored in the input vector.
  5318. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 8
  5319. *
  5320. *
  5321. * @b Note:
  5322. *
  5323. * The input and output formats are listed below. To satisfy the input format corresponding to
  5324. * your input size, you may need to perform an arithmetic shift operation before calling this
  5325. * function.
  5326. */
  5327. static inline void hpm_dsp_idct_q31(q31_t *src, uint32_t m)
  5328. {
  5329. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5330. riscv_dsp_idct_q31(src, m);
  5331. #endif
  5332. }
  5333. /**
  5334. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5335. * output will be stored in the input vector.
  5336. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 7
  5337. *
  5338. *
  5339. * @b Example
  5340. * <pre>
  5341. * Given 128 samples (that is, FFT_LOGN = 7), the example of floating-point DCT or IDCT type
  5342. * IV transform is as follows:
  5343. * \#define FFT_LOGN 7
  5344. * float32_t src[(1 << FFT_LOGN)] = {};
  5345. * riscv_dsp_dct4_f32(src, FFT_LOGN);
  5346. * riscv_dsp_idct4_f32(src, FFT_LOGN);
  5347. * This example also serves as a reference for examples of Q31 or Q15 DCT type IV and IDCT
  5348. * functions.
  5349. * </pre>
  5350. */
  5351. static inline void hpm_dsp_dct4_f32(float32_t *src, uint32_t m)
  5352. {
  5353. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5354. riscv_dsp_dct4_f32(src, m);
  5355. #endif
  5356. }
  5357. /**
  5358. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5359. * output will be stored in the input vector.
  5360. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 7
  5361. *
  5362. */
  5363. static inline void hpm_dsp_idct4_f32(float32_t *src, uint32_t m)
  5364. {
  5365. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5366. riscv_dsp_idct4_f32(src, m);
  5367. #endif
  5368. }
  5369. /**
  5370. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5371. * output will be stored in the input vector.
  5372. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 7
  5373. *
  5374. *
  5375. * @b Note:
  5376. *
  5377. * The input and output formats are listed below. To satisfy the input format corresponding to
  5378. * your input size, you may need to perform an arithmetic shift operation before calling this
  5379. * function.
  5380. */
  5381. static inline void hpm_dsp_dct4_q15(q15_t *src, uint32_t m)
  5382. {
  5383. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5384. riscv_dsp_dct4_q15(src, m);
  5385. #endif
  5386. }
  5387. /**
  5388. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5389. * output will be stored in the input vector.
  5390. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 7
  5391. *
  5392. *
  5393. * @b Note:
  5394. *
  5395. * The input and output formats are listed below. To satisfy the input format corresponding to
  5396. * your input size, you may need to perform an arithmetic shift operation before calling this
  5397. * function.
  5398. */
  5399. static inline void hpm_dsp_idct4_q15(q15_t *src, uint32_t m)
  5400. {
  5401. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5402. riscv_dsp_idct4_q15(src, m);
  5403. #endif
  5404. }
  5405. /**
  5406. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5407. * output will be stored in the input vector.
  5408. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 7
  5409. *
  5410. *
  5411. * @b Note:
  5412. *
  5413. * The input and output formats are listed below. To satisfy the input format corresponding to
  5414. * your input size, you may need to perform an arithmetic shift operation before calling this
  5415. * function.
  5416. */
  5417. static inline void hpm_dsp_dct4_q31(q31_t *src, uint32_t m)
  5418. {
  5419. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5420. riscv_dsp_dct4_q31(src, m);
  5421. #endif
  5422. }
  5423. /**
  5424. * @param[in, out] src pointer of the input vector. After the function is executed, the
  5425. * output will be stored in the input vector.
  5426. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 7
  5427. *
  5428. *
  5429. * @b Note:
  5430. *
  5431. * The input and output formats are listed below. To satisfy the input format corresponding to
  5432. * your input size, you may need to perform an arithmetic shift operation before calling this
  5433. * function.
  5434. */
  5435. static inline void hpm_dsp_idct4_q31(q31_t *src, uint32_t m)
  5436. {
  5437. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5438. riscv_dsp_idct4_q31(src, m);
  5439. #endif
  5440. }
  5441. /**
  5442. * @brief Software implementation does not depend on any hardware
  5443. *
  5444. */
  5445. /**
  5446. * @brief Construct a new hpm software cfft float object
  5447. *
  5448. * @param src requires double the space than other interfaces, 0-n for input data, n-2n for buffers, 0-n for output data
  5449. * @param m 2^n sampling points, including real and imaginary parts
  5450. */
  5451. void hpm_software_cfft_float(float *src, uint32_t m);
  5452. #endif
  5453. #if defined(HPMSOC_HAS_HPMSDK_FFA) && defined(HPM_EN_MATH_DSP_LIB)
  5454. #include "hpm_ffa_drv.h"
  5455. #include "hpm_soc.h"
  5456. /**
  5457. * @brief The ffa module requires the user to pay attention to cache operations
  5458. *
  5459. */
  5460. /**
  5461. * @brief fft calculation using ffa hardware acceleration unit, q15 format
  5462. *
  5463. * @param[in,out] src pointer of the input vector. After the function is executed,
  5464. * the output will be stored in the input vector.
  5465. * The complex data in the input vector are arranged as [real, imaginary,real, imaginary..., real, imaginary].
  5466. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 9
  5467. */
  5468. static inline void hpm_ffa_cfft_q15(q15_t *src, uint32_t m)
  5469. {
  5470. fft_xfer_t xfer = { 0 };
  5471. xfer.num_points = 1 << m;
  5472. xfer.src = src;
  5473. xfer.dst = src;
  5474. xfer.is_ifft = false;
  5475. xfer.src_data_type = FFA_DATA_TYPE_COMPLEX_Q15;
  5476. xfer.dst_data_type = FFA_DATA_TYPE_COMPLEX_Q15;
  5477. ffa_calculate_fft_blocking(HPM_FFA, &xfer);
  5478. }
  5479. /**
  5480. * @brief fft calculation using ffa hardware acceleration unit, q31 format
  5481. *
  5482. * @param[in,out] src pointer of the input vector. After the function is executed,
  5483. * the output will be stored in the input vector.
  5484. * The complex data in the input vector are arranged as [real, imaginary,real, imaginary..., real, imaginary].
  5485. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 9
  5486. */
  5487. static inline void hpm_ffa_cfft_q31(q31_t *src, uint32_t m)
  5488. {
  5489. fft_xfer_t xfer = { 0 };
  5490. xfer.num_points = 1 << m;
  5491. xfer.src = src;
  5492. xfer.dst = src;
  5493. xfer.is_ifft = false;
  5494. xfer.src_data_type = FFA_DATA_TYPE_COMPLEX_Q31;
  5495. xfer.dst_data_type = FFA_DATA_TYPE_COMPLEX_Q31;
  5496. ffa_calculate_fft_blocking(HPM_FFA, &xfer);
  5497. }
  5498. #if defined(HPM_IP_FEATURE_FFA_FP32) && HPM_IP_FEATURE_FFA_FP32
  5499. static inline void hpm_ffa_cfft_f32(float *src, uint32_t m)
  5500. {
  5501. fft_xfer_t xfer = { 0 };
  5502. xfer.num_points = 1 << m;
  5503. xfer.src = src;
  5504. xfer.dst = src;
  5505. xfer.is_ifft = false;
  5506. xfer.src_data_type = FFA_DATA_TYPE_COMPLEX_FP32;
  5507. xfer.dst_data_type = FFA_DATA_TYPE_COMPLEX_FP32;
  5508. ffa_enable_fp_bias(HPM_FFA);
  5509. ffa_set_coef_max_index(HPM_FFA, 0);
  5510. ffa_set_output_max_index(HPM_FFA, 20);
  5511. ffa_set_input_max_index(HPM_FFA, 20 - m);
  5512. ffa_calculate_fft_blocking(HPM_FFA, &xfer);
  5513. }
  5514. #endif
  5515. /**
  5516. * @brief ifft calculation using ffa hardware acceleration unit, q15 format
  5517. *
  5518. * @param[in,out] src pointer of the input vector. After the function is executed,
  5519. * the output will be stored in the input vector.
  5520. * The complex data in the input vector are arranged as [real, imaginary,real, imaginary..., real, imaginary].
  5521. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 9
  5522. */
  5523. static inline void hpm_ffa_cifft_q15(q15_t *src, uint32_t m)
  5524. {
  5525. fft_xfer_t xfer = { 0 };
  5526. xfer.num_points = 1 << m;
  5527. xfer.src = src;
  5528. xfer.dst = src;
  5529. xfer.is_ifft = true;
  5530. xfer.src_data_type = FFA_DATA_TYPE_COMPLEX_Q15;
  5531. xfer.dst_data_type = FFA_DATA_TYPE_COMPLEX_Q15;
  5532. ffa_calculate_fft_blocking(HPM_FFA, &xfer);
  5533. }
  5534. /**
  5535. * @brief ifft calculation using ffa hardware acceleration unit, q31 format
  5536. *
  5537. * @param[in,out] src pointer of the input vector. After the function is executed,
  5538. * the output will be stored in the input vector.
  5539. * The complex data in the input vector are arranged as [real, imaginary,real, imaginary..., real, imaginary].
  5540. * @param[in] m base 2 logarithm value of the sample number and it can be set from 3 to 9
  5541. */
  5542. static inline void hpm_ffa_cifft_q31(q31_t *src, uint32_t m)
  5543. {
  5544. fft_xfer_t xfer = { 0 };
  5545. xfer.num_points = 1 << m;
  5546. xfer.src = src;
  5547. xfer.dst = src;
  5548. xfer.is_ifft = true;
  5549. xfer.src_data_type = FFA_DATA_TYPE_COMPLEX_Q31;
  5550. xfer.dst_data_type = FFA_DATA_TYPE_COMPLEX_Q31;
  5551. ffa_calculate_fft_blocking(HPM_FFA, &xfer);
  5552. }
  5553. #if defined(HPM_IP_FEATURE_FFA_FP32) && HPM_IP_FEATURE_FFA_FP32
  5554. static inline void hpm_ffa_cifft_f32(float *src, uint32_t m)
  5555. {
  5556. fft_xfer_t xfer = { 0 };
  5557. xfer.num_points = 1 << m;
  5558. xfer.src = src;
  5559. xfer.dst = src;
  5560. xfer.is_ifft = true;
  5561. xfer.src_data_type = FFA_DATA_TYPE_COMPLEX_FP32;
  5562. xfer.dst_data_type = FFA_DATA_TYPE_COMPLEX_FP32;
  5563. ffa_enable_fp_bias(HPM_FFA);
  5564. ffa_set_coef_max_index(HPM_FFA, 0x0);
  5565. ffa_set_output_max_index(HPM_FFA, 10);
  5566. ffa_set_input_max_index(HPM_FFA, 20);
  5567. ffa_calculate_fft_blocking(HPM_FFA, &xfer);
  5568. }
  5569. #endif
  5570. #endif
  5571. #endif
  5572. /**
  5573. * @}
  5574. *
  5575. */
  5576. #ifdef HPM_MATH_DSP_UTILS
  5577. /**
  5578. * @defgroup utils DSP Utils Functions
  5579. * This set of functions implements sine, cosine, arctanm, and square root.
  5580. * There are separate functions for Q15, Q31, and floating-point data.
  5581. * @ingroup hpmmath
  5582. * @{
  5583. */
  5584. #ifdef HPM_EN_MATH_DSP_LIB
  5585. #ifdef __zcc__
  5586. #include <tpt_math.h>
  5587. #endif
  5588. #include "riscv_dsp_utils_math.h"
  5589. // Cosine and Sine
  5590. static inline float32_t hpm_dsp_cos_f32(float32_t src)
  5591. {
  5592. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5593. return riscv_dsp_cos_f32(src);
  5594. #endif
  5595. }
  5596. static inline q31_t hpm_dsp_cos_q31(q31_t src)
  5597. {
  5598. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5599. return riscv_dsp_cos_q31(src);
  5600. #endif
  5601. }
  5602. static inline q15_t hpm_dsp_cos_q15(q15_t src)
  5603. {
  5604. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5605. return riscv_dsp_cos_q15(src);
  5606. #endif
  5607. }
  5608. static inline float32_t hpm_dsp_sin_f32(float32_t src)
  5609. {
  5610. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5611. return riscv_dsp_sin_f32(src);
  5612. #endif
  5613. }
  5614. #if defined (__riscv_zfh)
  5615. /**
  5616. * @param[in] src input value (radian)
  5617. * @return Sine value of the input
  5618. */
  5619. static inline float16_t hpm_dsp_sin_f16(float16_t src)
  5620. {
  5621. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5622. return riscv_dsp_sin_f16(src);
  5623. #endif
  5624. }
  5625. #endif
  5626. static inline q31_t hpm_dsp_sin_q31(q31_t src)
  5627. {
  5628. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5629. return riscv_dsp_sin_q31(src);
  5630. #endif
  5631. }
  5632. static inline q15_t hpm_dsp_sin_q15(q15_t src)
  5633. {
  5634. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5635. return riscv_dsp_sin_q15(src);
  5636. #endif
  5637. }
  5638. // Arc tangent
  5639. static inline float32_t hpm_dsp_atan_f32(float32_t src)
  5640. {
  5641. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5642. return riscv_dsp_atan_f32(src);
  5643. #endif
  5644. }
  5645. static inline q31_t hpm_dsp_atan_q31(q31_t src)
  5646. {
  5647. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5648. return riscv_dsp_atan_q31(src);
  5649. #endif
  5650. }
  5651. static inline q15_t hpm_dsp_atan_q15(q15_t src)
  5652. {
  5653. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5654. return riscv_dsp_atan_q15(src);
  5655. #endif
  5656. }
  5657. static inline float32_t hpm_dsp_atan2_f32(float32_t srcy, float32_t src2)
  5658. {
  5659. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5660. return riscv_dsp_atan2_f32(srcy, src2);
  5661. #endif
  5662. }
  5663. static inline q15_t hpm_dsp_atan2_q15(q15_t srcy, q15_t src2)
  5664. {
  5665. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5666. return riscv_dsp_atan2_q15(srcy, src2);
  5667. #endif
  5668. }
  5669. static inline q31_t hpm_dsp_atan2_q31(q31_t srcy, q31_t src2)
  5670. {
  5671. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5672. return riscv_dsp_atan2_q31(srcy, src2);
  5673. #endif
  5674. }
  5675. // Square Root
  5676. /**
  5677. * @brief Square root of the floating-potint input.
  5678. * @param[in] src the input value.
  5679. * @return the suqare root of input.
  5680. */
  5681. static inline float32_t hpm_dsp_sqrt_f32(float32_t src)
  5682. {
  5683. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5684. return riscv_dsp_sqrt_f32(src);
  5685. #endif
  5686. }
  5687. /**
  5688. * @brief Square root of the q31 input.
  5689. * @param[in] src the input value.
  5690. * @return the suqare root of input.
  5691. */
  5692. static inline q31_t hpm_dsp_sqrt_q31(q31_t src)
  5693. {
  5694. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5695. return riscv_dsp_sqrt_q31(src);
  5696. #endif
  5697. }
  5698. /**
  5699. * @brief Square root of the q15 input.
  5700. * @param[in] src the input value.
  5701. * @return the suqare root of input.
  5702. */
  5703. static inline q15_t hpm_dsp_sqrt_q15(q15_t src)
  5704. {
  5705. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5706. return riscv_dsp_sqrt_q15(src);
  5707. #endif
  5708. }
  5709. // Convert function
  5710. /**
  5711. * @brief Convert a floating-point vector to Q15.
  5712. * @param[in] *src the input vector point.
  5713. * @param[out] *dst yhe output vector point.
  5714. * @param[in] size size of vector.
  5715. */
  5716. static inline void hpm_dsp_convert_f32_q15(float32_t *src, q15_t *dst, uint32_t size)
  5717. {
  5718. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5719. riscv_dsp_convert_f32_q15(src, dst, size);
  5720. #endif
  5721. }
  5722. /**
  5723. * @brief Convert a floating-point vector to Q31.
  5724. * @param[in] *src the input vector point.
  5725. * @param[out] *dst the output vector point.
  5726. * @param[in] size size of vectors.
  5727. */
  5728. static inline void hpm_dsp_convert_f32_q31(float32_t *src, q31_t *dst, uint32_t size)
  5729. {
  5730. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5731. #ifdef __zcc__
  5732. tpt_f32_to_q31(dst, src, size);
  5733. #else
  5734. riscv_dsp_convert_f32_q31(src, dst, size);
  5735. #endif
  5736. #endif
  5737. }
  5738. /**
  5739. * @brief Convert a floating-point vector to Q7.
  5740. * @param[in] *src the input vector point.
  5741. * @param[out] *dst the output vector point.
  5742. * @param[in] size size of vectors.
  5743. */
  5744. static inline void hpm_dsp_convert_f32_q7(float32_t *src, q7_t *dst, uint32_t size)
  5745. {
  5746. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5747. riscv_dsp_convert_f32_q7(src, dst, size);
  5748. #endif
  5749. }
  5750. /**
  5751. * @brief Convert a Q15 vector to floating.
  5752. * @param[in] *src the input vector point.
  5753. * @param[out] *dst the output vector point.
  5754. * @param[in] size size of vector.
  5755. */
  5756. static inline void hpm_dsp_convert_q15_f32(q15_t *src, float32_t *dst, uint32_t size)
  5757. {
  5758. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5759. riscv_dsp_convert_q15_f32(src, dst, size);
  5760. #endif
  5761. }
  5762. /**
  5763. * @brief Convert a Q15 vector to Q31.
  5764. * @param[in] *src the input vector point.
  5765. * @param[out] *dst the output vector point.
  5766. * @param[in] size size of vector.
  5767. */
  5768. static inline void hpm_dsp_convert_q15_q31(q15_t *src, q31_t *dst, uint32_t size)
  5769. {
  5770. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5771. riscv_dsp_convert_q15_q31(src, dst, size);
  5772. #endif
  5773. }
  5774. /**
  5775. * @brief Convert a Q15 vector to Q7.
  5776. * @param[in] *src the input vector point.
  5777. * @param[out] *dst the output vector point.
  5778. * @param[in] size size of vector.
  5779. */
  5780. static inline void hpm_dsp_convert_q15_q7(q15_t *src, q7_t *dst, uint32_t size)
  5781. {
  5782. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5783. riscv_dsp_convert_q15_q7(src, dst, size);
  5784. #endif
  5785. }
  5786. /**
  5787. * @brief Convert a Q31 vector to floating.
  5788. * @param[in] *src the input vector point.
  5789. * @param[out] *dst the output vector point.
  5790. * @param[in] size size of vector.
  5791. */
  5792. static inline void hpm_dsp_convert_q31_f32(q31_t *src, float32_t *dst, uint32_t size)
  5793. {
  5794. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5795. #ifdef __zcc__
  5796. tpt_q31_to_f32(dst, src, size);
  5797. #else
  5798. riscv_dsp_convert_q31_f32(src, dst, size);
  5799. #endif
  5800. #endif
  5801. }
  5802. /**
  5803. * @brief Convert a Q31 vector to Q15.
  5804. * @param[in] *src the input vector point.
  5805. * @param[out] *dst the output vector point.
  5806. * @param[in] size size of vector.
  5807. */
  5808. static inline void hpm_dsp_convert_q31_q15(q31_t *src, q15_t *dst, uint32_t size)
  5809. {
  5810. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5811. riscv_dsp_convert_q31_q15(src, dst, size);
  5812. #endif
  5813. }
  5814. /**
  5815. * @brief Convert a Q31 vector to Q7.
  5816. * @param[in] *src the input vector point.
  5817. * @param[out] *dst the output vector point.
  5818. * @param[in] size size of vector.
  5819. */
  5820. static inline void hpm_dsp_convert_q31_q7(q31_t *src, q7_t *dst, uint32_t size)
  5821. {
  5822. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5823. riscv_dsp_convert_q31_q7(src, dst, size);
  5824. #endif
  5825. }
  5826. /**
  5827. * @brief Convert a Q7 vector to floating.
  5828. * @param[in] *src the input vector point.
  5829. * @param[out] *dst the output vector point.
  5830. * @param[in] size size of vector.
  5831. */
  5832. static inline void hpm_dsp_convert_q7_f32(q7_t *src, float32_t *dst, uint32_t size)
  5833. {
  5834. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5835. riscv_dsp_convert_q7_f32(src, dst, size);
  5836. #endif
  5837. }
  5838. /**
  5839. * @brief Convert a Q7 vector to Q15.
  5840. * @param[in] *src the input vector point.
  5841. * @param[out] *dst the output vector point.
  5842. * @param[in] size size of vector.
  5843. */
  5844. static inline void hpm_dsp_convert_q7_q15(q7_t *src, q15_t *dst, uint32_t size)
  5845. {
  5846. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5847. riscv_dsp_convert_q7_q15(src, dst, size);
  5848. #endif
  5849. }
  5850. /**
  5851. * @brief Convert a Q7 vector to Q31.
  5852. * @param[in] *src the input vector point.
  5853. * @param[out] *dst the output vector point.
  5854. * @param[in] size size of vector.
  5855. */
  5856. static inline void hpm_dsp_convert_q7_q31(q7_t *src, q31_t *dst, uint32_t size)
  5857. {
  5858. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5859. riscv_dsp_convert_q7_q31(src, dst, size);
  5860. #endif
  5861. }
  5862. // Duplicate function
  5863. /**
  5864. * @brief Duplicate the floating vector
  5865. * @param[in] *src the input vector point.
  5866. * @param[out] *dst the output vector point.
  5867. * @param[in] size size of vectors.
  5868. */
  5869. static inline void hpm_dsp_dup_f32(float32_t *src, float32_t *dst, uint32_t size)
  5870. {
  5871. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5872. riscv_dsp_dup_f32(src, dst, size);
  5873. #endif
  5874. }
  5875. /**
  5876. * @brief Duplicate the Q15 vector
  5877. * @param[in] *src the input vector point.
  5878. * @param[out] *dst the output vector point.
  5879. * @param[in] size size of vectors.
  5880. */
  5881. static inline void hpm_dsp_dup_q15(q15_t *src, q15_t *dst, uint32_t size)
  5882. {
  5883. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5884. riscv_dsp_dup_q15(src, dst, size);
  5885. #endif
  5886. }
  5887. /**
  5888. * @brief Duplicate the Q31 vector
  5889. * @param[in] *src the input vector point.
  5890. * @param[out] *dst the output vector point.
  5891. * @param[in] size size of vectors.
  5892. */
  5893. static inline void hpm_dsp_dup_q31(q31_t *src, q31_t *dst, uint32_t size)
  5894. {
  5895. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5896. riscv_dsp_dup_q31(src, dst, size);
  5897. #endif
  5898. }
  5899. /**
  5900. * @brief Duplicate the Q7 vector
  5901. * @param[in] *src the input vector point.
  5902. * @param[out] *dst the output vector point.
  5903. * @param[in] size size of vectors.
  5904. */
  5905. static inline void hpm_dsp_dup_q7(q7_t *src, q7_t *dst, uint32_t size)
  5906. {
  5907. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5908. riscv_dsp_dup_q7(src, dst, size);
  5909. #endif
  5910. }
  5911. // Set function
  5912. /**
  5913. * @brief Set the floating-point vector.
  5914. * @param[in] val specify floating-point value.
  5915. * @param[out] *dst the output vector point.
  5916. * @param[in] size size of the vector.
  5917. */
  5918. static inline void hpm_dsp_set_f32(float32_t val, float32_t *dst, uint32_t size)
  5919. {
  5920. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5921. riscv_dsp_set_f32(val, dst, size);
  5922. #endif
  5923. }
  5924. /**
  5925. * @brief Set the Q15 vector.
  5926. * @param[in] val specify Q15 value.
  5927. * @param[out] *dst the output vector point.
  5928. * @param[in] size size of the vector.
  5929. */
  5930. static inline void hpm_dsp_set_q15(q15_t val, q15_t *dst, uint32_t size)
  5931. {
  5932. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5933. riscv_dsp_set_q15(val, dst, size);
  5934. #endif
  5935. }
  5936. /**
  5937. * @brief Set the Q31 vector.
  5938. * @param[in] val specify Q31 value.
  5939. * @param[out] *dst the output vector point.
  5940. * @param[in] size size of the vector.
  5941. */
  5942. static inline void hpm_dsp_set_q31(q31_t val, q31_t *dst, uint32_t size)
  5943. {
  5944. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5945. riscv_dsp_set_q31(val, dst, size);
  5946. #endif
  5947. }
  5948. /**
  5949. * @brief Set the Q7 vector.
  5950. * @param[in] val specify Q7 value.
  5951. * @param[out] *dst the output vector point.
  5952. * @param[in] size size of the vector.
  5953. */
  5954. static inline void hpm_dsp_set_q7(q7_t val, q7_t *dst, uint32_t size)
  5955. {
  5956. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5957. riscv_dsp_set_q7(val, dst, size);
  5958. #endif
  5959. }
  5960. /**
  5961. * @brief Weighted Sum of the floating-potint vector.
  5962. * @param[in] *src points to the input vector.
  5963. * @param[in] *weight points to the weighted vector.
  5964. * @param[in] size size of the vectors.
  5965. * @return Weighted Sumvalue.
  5966. *
  5967. */
  5968. static inline float32_t hpm_dsp_weighted_sum_f32(const float32_t *src, const float32_t *weight, uint32_t size)
  5969. {
  5970. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5971. return riscv_dsp_weighted_sum_f32(src, weight, size);
  5972. #endif
  5973. }
  5974. /**
  5975. * @brief Barycenter of the floating-potint type.
  5976. * @param[in] *src points to the input vector.
  5977. * @param[in] *weights points to the weighted vector.
  5978. * @param[out] *out points to the out vector.
  5979. * @param[in] numofvec size of the vectors.
  5980. * @param[in] dimofvec size of the vectors.
  5981. *
  5982. */
  5983. static inline void hpm_dsp_barycenter_f32(const float32_t *src, const float32_t *weights, float32_t *out, uint32_t numofvec, uint32_t dimofvec)
  5984. {
  5985. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5986. riscv_dsp_barycenter_f32(src, weights, out, numofvec, dimofvec);
  5987. #endif
  5988. }
  5989. /**
  5990. * @brief Calculate exponential value of f32 vector.
  5991. * @param[in] src input value
  5992. * @return exponential value of the input
  5993. */
  5994. static inline float32_t hpm_dsp_exp_f32(float32_t src)
  5995. {
  5996. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  5997. return riscv_dsp_exp_f32(src);
  5998. #endif
  5999. }
  6000. #if defined (__riscv_zfh)
  6001. /**
  6002. * @brief Calculate exponential value of f16 vector.
  6003. * @param[in] src input value
  6004. * @return exponential value of the input
  6005. */
  6006. static inline float16_t hpm_dsp_exp_f16(float16_t src)
  6007. {
  6008. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  6009. return riscv_dsp_exp_f16(src);
  6010. #endif
  6011. }
  6012. #endif
  6013. /**
  6014. * @brief Calculate sigmoid value of f32 vector.
  6015. * @param[in] src input value
  6016. * @return sigmoid value of the input
  6017. */
  6018. static inline float32_t hpm_dsp_sigmoid_f32(float32_t src)
  6019. {
  6020. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  6021. return riscv_dsp_sigmoid_f32(src);
  6022. #endif
  6023. }
  6024. #if defined (__riscv_zfh)
  6025. /**
  6026. * @brief Calculate sigmoid value of f16 vector.
  6027. * @param[in] src input value
  6028. * @return sigmoid value of the input
  6029. */
  6030. static inline float16_t hpm_dsp_sigmoid_f16(float16_t src)
  6031. {
  6032. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  6033. return riscv_dsp_sigmoid_f16(src);
  6034. #endif
  6035. }
  6036. #endif
  6037. /**
  6038. * @brief Calculate the natural logarithm value of f32 vector.
  6039. * @param[in] src input value
  6040. * @return natural logarithm value of the input
  6041. */
  6042. static inline float32_t hpm_dsp_log_f32(float32_t src)
  6043. {
  6044. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  6045. return riscv_dsp_log_f32(src);
  6046. #endif
  6047. }
  6048. #if defined (__riscv_zfh)
  6049. /**
  6050. * @brief Calculate the natural logarithm value of f16 vector.
  6051. * @param[in] src input value
  6052. * @return natural logarithm value of the input
  6053. */
  6054. static inline float16_t hpm_dsp_log_f16(float16_t src)
  6055. {
  6056. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  6057. return riscv_dsp_log_f16(src);
  6058. #endif
  6059. }
  6060. #endif
  6061. /**
  6062. * @}
  6063. *
  6064. */
  6065. #endif
  6066. #endif
  6067. #ifdef HPM_MATH_DSP_SORT
  6068. /**
  6069. * @defgroup sort DSP Sort Functions
  6070. * The generic sort function sorts elements of a vector by the algorithm and sorting order specified
  6071. * in its instance structure. The algorithms to be chosen from to perform the generic sorting
  6072. * include bitonic sort, bubble sort, heap sort, insertion sort, quick sort and selection sort.
  6073. * Andes DSP library only supports the generic sort function for floating-point data.
  6074. * @ingroup hpmmath
  6075. * @{
  6076. */
  6077. #ifdef HPM_EN_MATH_DSP_LIB
  6078. #include "riscv_dsp_sort_math.h"
  6079. /**
  6080. * @param[in,out] instance pointer of the instance structure
  6081. * @param[in] alg desired sorting algorithm
  6082. * @param[in] order desired sorting order
  6083. *
  6084. * @b Note:
  6085. *
  6086. * 1. This function has to be called to initialize the instance structure before the function
  6087. * riscv_dsp_sort_f32 is executed. Please refer to code examples.
  6088. *
  6089. * 2. The possible sorting algorithms for the generic sorting (i.e., options for alg) include
  6090. * - RISCV_DSP_SORT_BITONIC bitonic sort
  6091. * - RISCV_DSP_SORT_BUBBLE bubble sort
  6092. * - RISCV_DSP_SORT_HEAP heap sort
  6093. * - RISCV_DSP_SORT_INSERTION insertion sort
  6094. * - RISCV_DSP_SORT_QUICK quick sort
  6095. * - RISCV_DSP_SORT_SELECTION selection sort
  6096. *
  6097. * 3. The possible sorting orders for the generic sorting (i.e., options for order) include
  6098. * - RISCV_DSP_SORT_DESCENDING descending order
  6099. * - RISCV_DSP_SORT_ASCENDING ascending order
  6100. */
  6101. static inline void hpm_dsp_sort_init_f32(riscv_dsp_sort_f32_t * instance, riscv_dsp_sort_alg alg, riscv_dsp_sort_order order)
  6102. {
  6103. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  6104. riscv_dsp_sort_init_f32(instance, alg, order);
  6105. #endif
  6106. }
  6107. /**
  6108. * @brief Generic sorting function
  6109. *
  6110. * @param[in] instance pointer of the instance structure
  6111. * @param[in] src pointer of the input vector
  6112. * @param[out] dst pointer of the output vector
  6113. * @param[in] size number of elements in a vector
  6114. *
  6115. * @b Note:
  6116. *
  6117. * 1. The possible sorting algorithms for the generic sorting (i.e., options for alg) include
  6118. * - RISCV_DSP_SORT_BITONIC bitonic sort
  6119. * - RISCV_DSP_SORT_BUBBLE bubble sort
  6120. * - RISCV_DSP_SORT_HEAP heap sort
  6121. * - RISCV_DSP_SORT_INSERTION insertion sort
  6122. * - RISCV_DSP_SORT_QUICK quick sort
  6123. * - RISCV_DSP_SORT_SELECTION selection sort
  6124. *
  6125. * 2. The possible sorting orders for the generic sorting (i.e., options for order) include
  6126. * - RISCV_DSP_SORT_DESCENDING descending order
  6127. * - RISCV_DSP_SORT_ASCENDING ascending order
  6128. *
  6129. * 3. To ensure correct results, you must initialize the instance structure with the function
  6130. * riscv_dsp_sort_init_f32 before using this function riscv_dsp_sort_f32. For
  6131. * how to use the two functions, please refer to the code examples below.
  6132. *
  6133. * @b Example
  6134. * <pre>
  6135. * With the input size as 100, sorting order as ascending and sorting algorithm as quick
  6136. * sort, the code example of generic sorting is as follows:
  6137. *
  6138. * \#define size 100
  6139. * riscv_dsp_sort_f32_t *instance;
  6140. * float32_t src[size] = {};
  6141. * float32_t dst[size];
  6142. * riscv_dsp_sort_init_f32(instance, RISCV_DSP_SORT_QUICK,
  6143. * RISCV_DSP_SORT_ASCENDING);
  6144. * riscv_dsp_sort_f32(instance, src, dst, size);
  6145. * </pre>
  6146. */
  6147. static inline void hpm_dsp_sort_f32(const riscv_dsp_sort_f32_t * instance,float32_t * src, float32_t * dst, uint32_t size)
  6148. {
  6149. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  6150. riscv_dsp_sort_f32(instance, src, dst, size);
  6151. #endif
  6152. }
  6153. /**
  6154. * @param[in, out] instance pointer of the instance structure.
  6155. * @param[in] order desired sorting order
  6156. * @param[in] buf pointer of the working buffer
  6157. *
  6158. * @b Note:
  6159. *
  6160. * 1. This function has to be called to initialize the instance structure before the function
  6161. * riscv_dsp_sort_merge_f32 is executed. Please refer to Section 2.11.2.2 for a code
  6162. * example.
  6163. *
  6164. * 2. The possible sorting orders for the merge sorting (i.e., options for order) include
  6165. * - RISCV_DSP_SORT_DESCENDING descending order
  6166. * - RISCV_DSP_SORT_ASCENDING ascending order
  6167. */
  6168. static inline void hpm_dsp_sort_merge_init_f32(riscv_dsp_sort_merge_f32_t * instance, riscv_dsp_sort_order order, float32_t * buf)
  6169. {
  6170. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  6171. riscv_dsp_sort_merge_init_f32(instance, order, buf);
  6172. #endif
  6173. }
  6174. /**
  6175. * @brief Merge sort
  6176. *
  6177. * @param[in] instance pointer of the instance structure.
  6178. * @param[in] src pointer of the input vector
  6179. * @param[out] dst pointer of the output vector
  6180. * @param[in] size number of elements in a vector
  6181. *
  6182. * @b Note:
  6183. *
  6184. * 1. The possible sorting orders for the merge sorting (i.e., options for order) include
  6185. * - RISCV_DSP_SORT_DESCENDING descending order
  6186. * - RISCV_DSP_SORT_ASCENDING ascending order
  6187. *
  6188. * 2. To ensure correct results, you must initialize the instance structure with the function
  6189. * riscv_dsp_sort_merge_init_f32 before using this function
  6190. * riscv_dsp_sort_merge_f32. For how to use the two functions, please refer to the
  6191. * code example below.
  6192. *
  6193. * @b Example
  6194. * <pre>
  6195. * With the input size as 100 and sorting order as descending, the code example of merge
  6196. * sorting is as follows:
  6197. *
  6198. * \#define size 100
  6199. * riscv_dsp_sort_merge_f32_t *instance;
  6200. * float32_t src[size] = {};
  6201. * float32_t buf[size];
  6202. * float32_t dst[size];
  6203. * riscv_dsp_sort_merge_init_f32(instance, RISCV_DSP_SORT_DESCENDING, buf);
  6204. * riscv_dsp_sort_merge_f32(instance, src, dst, size);
  6205. * </pre>
  6206. */
  6207. static inline void hpm_dsp_sort_merge_f32(const riscv_dsp_sort_merge_f32_t * instance, float32_t * src, float32_t * dst, uint32_t size)
  6208. {
  6209. #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
  6210. riscv_dsp_sort_merge_f32(instance, src, dst, size);
  6211. #endif
  6212. }
  6213. #endif
  6214. #endif
  6215. #ifdef HPM_MATH_NN_TINYENGINE
  6216. #ifdef HPM_EN_MATH_DSP_LIB
  6217. #include "riscv_math_types.h"
  6218. #include <string.h>
  6219. #include "riscv_simd_convert.h"
  6220. #define LEFT_SHIFT(_shift) (_shift > 0 ? _shift : 0)
  6221. #define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift)
  6222. #define Q31_MAX ((q31_t)(0x7FFFFFFFL))
  6223. #define Q31_MIN ((q31_t)(0x80000000L))
  6224. static inline void write_q15x2_ia(
  6225. q15_t **pQ15,
  6226. q31_t value)
  6227. {
  6228. q31_t val = value;
  6229. (*pQ15)[0] = (val & 0x0FFFF);
  6230. (*pQ15)[1] = (val >> 16) & 0x0FFFF;
  6231. *pQ15 += 2;
  6232. }
  6233. /**
  6234. * @brief Read 2 q15 elements and post increment pointer.
  6235. *
  6236. * @param[in] in_q15 Pointer to pointer that holds address of input.
  6237. * @return q31 value
  6238. */
  6239. __STATIC_FORCEINLINE q31_t hpm_nn_read_q15x2_ia(const q15_t **in_q15)
  6240. {
  6241. q31_t val;
  6242. val = *(q31_t *)(*in_q15);
  6243. *in_q15 += 2;
  6244. return val;
  6245. }
  6246. /**
  6247. * @brief Saturating doubling high multiply. Result matches
  6248. * NEON instruction VQRDMULH.
  6249. * @param[in] m1 Multiplicand
  6250. * @param[in] m2 Multiplier
  6251. * @return Result of multiplication.
  6252. *
  6253. */
  6254. __STATIC_FORCEINLINE q31_t hpm_nn_sat_doubling_high_mult(const q31_t m1, const q31_t m2)
  6255. {
  6256. q31_t result = 0;
  6257. q63_t mult = 1 << 30;
  6258. if ((m1 < 0) ^ (m2 < 0)) {
  6259. mult = 1 - mult;
  6260. }
  6261. mult = mult + (q63_t)m1 * m2;
  6262. result = mult / (1UL << 31);
  6263. if ((m1 == m2) && (m1 == (int32_t)Q31_MIN)) {
  6264. result = Q31_MAX;
  6265. }
  6266. return result;
  6267. }
  6268. /**
  6269. * @brief Rounding divide by power of two.
  6270. * @param[in] dividend - Dividend
  6271. * @param[in] exponent - Divisor = power(2, exponent)
  6272. * Range: [0, 31]
  6273. * @return Rounded result of division. Midpoint is rounded away from zero.
  6274. *
  6275. */
  6276. __STATIC_FORCEINLINE q31_t hpm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
  6277. {
  6278. q31_t result = 0;
  6279. const q31_t remainder_mask = (1l << exponent) - 1;
  6280. int32_t remainder = remainder_mask & dividend;
  6281. result = dividend >> exponent;
  6282. q31_t threshold = remainder_mask >> 1;
  6283. if (result < 0) {
  6284. threshold++;
  6285. }
  6286. if (remainder > threshold) {
  6287. result++;
  6288. }
  6289. return result;
  6290. }
  6291. __STATIC_FORCEINLINE q31_t hpm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift)
  6292. {
  6293. return hpm_nn_divide_by_power_of_two(hpm_nn_sat_doubling_high_mult(val * (1 << LEFT_SHIFT(shift)), multiplier),
  6294. RIGHT_SHIFT(shift));
  6295. }
  6296. /**
  6297. * @brief Read 4 q7 from q7 pointer and post increment pointer.
  6298. * @param[in] in_q7 Pointer to pointer that holds address of input.
  6299. * @return q31 value
  6300. */
  6301. __STATIC_FORCEINLINE q31_t hpm_nn_read_q7x4_ia(const q7_t **in_q7)
  6302. {
  6303. q31_t val;
  6304. val = *(q31_t *)(*in_q7);
  6305. *in_q7 += 4;
  6306. return val;
  6307. }
  6308. /**
  6309. * @brief read and expand one q7 word into two q15 words with reordering
  6310. */
  6311. __STATIC_FORCEINLINE const q7_t *read_and_pad_reordered(const q7_t *source, q31_t *out1, q31_t *out2)
  6312. {
  6313. q31_t inA = hpm_nn_read_q7x4_ia(&source);
  6314. *out2 = __SXTB16_ROR(inA, 8);
  6315. *out1 = __SXTB16(inA);
  6316. return source;
  6317. }
  6318. /**
  6319. * @brief read and expand one q7 word into two q15 words
  6320. */
  6321. __STATIC_FORCEINLINE const q7_t *read_and_pad(const q7_t *source, q31_t *out1, q31_t *out2)
  6322. {
  6323. q31_t inA = hpm_nn_read_q7x4_ia(&source);
  6324. q31_t inAbuf1 = __SXTB16_ROR(inA, 8);
  6325. q31_t inAbuf2 = __SXTB16(inA);
  6326. *out2 = __PKHTB(inAbuf1, inAbuf2, 16);
  6327. *out1 = __PKHBT(inAbuf2, inAbuf1, 16);
  6328. return source;
  6329. }
  6330. /**
  6331. * @brief Read 4 s8 from s8 pointer and post increment pointer.
  6332. * @param[in] in_s8 Pointer to pointer that holds address of input.
  6333. * @return q31 value
  6334. */
  6335. __STATIC_FORCEINLINE int32_t hpm_nn_read_s8x4_ia(const int8_t **in_s8)
  6336. {
  6337. int32_t val;
  6338. val = *(int32_t *)(*in_s8);
  6339. *in_s8 += 4;
  6340. return val;
  6341. }
  6342. __STATIC_FORCEINLINE void hpm_nn_q7_to_q15_with_offset(const int8_t *src, int16_t *dst, int32_t block_size, int16_t offset)
  6343. {
  6344. int32_t block_cnt;
  6345. /* Run the below code for cores that support SIMD instructions */
  6346. int32_t in_q7x4;
  6347. int32_t in_q15x2_1;
  6348. int32_t in_q15x2_2;
  6349. int32_t out_q15x2_1;
  6350. int32_t out_q15x2_2;
  6351. /*loop unrolling */
  6352. block_cnt = block_size >> 2;
  6353. /* First part of the processing with loop unrolling. Compute 4 outputs at a time. */
  6354. const int32_t offset_q15x2 = __PKHBT(offset, offset, 16);
  6355. while (block_cnt > 0) {
  6356. /* convert from s8 to s16 and then store the results in the destination buffer */
  6357. in_q7x4 = hpm_nn_read_s8x4_ia(&src);
  6358. /* Extract and sign extend each of the four s8 values to s16 */
  6359. in_q15x2_1 = __SXTAB16(offset_q15x2, __ROR(in_q7x4, 8));
  6360. in_q15x2_2 = __SXTAB16(offset_q15x2, in_q7x4);
  6361. out_q15x2_2 = __PKHTB(in_q15x2_1, in_q15x2_2, 16);
  6362. out_q15x2_1 = __PKHBT(in_q15x2_2, in_q15x2_1, 16);
  6363. write_q15x2_ia(&dst, out_q15x2_1);
  6364. write_q15x2_ia(&dst, out_q15x2_2);
  6365. block_cnt--;
  6366. }
  6367. /* Handle left over samples */
  6368. block_cnt = block_size % 0x4;
  6369. while (block_cnt > 0) {
  6370. *dst++ = (int16_t)*src++ + offset;
  6371. /* Decrement the loop counter */
  6372. block_cnt--;
  6373. }
  6374. }
  6375. #endif
  6376. #endif
  6377. #ifdef HPM_MATH_NN_ACTIVATION
  6378. #ifdef HPM_EN_MATH_NN_LIB
  6379. #if defined(__zcc__)
  6380. #include "tpt_nn_activation.h"
  6381. #else
  6382. #include "riscv_nn_activation.h"
  6383. #endif
  6384. /**
  6385. * @defgroup nnactivation NN Activation Functions
  6386. * @ingroup hpmmath
  6387. * @brief The activation functions are used to filter out some input data. They
  6388. * include sigmoid, tanh and ReLU (Rectified Linear Unit) functions.
  6389. *
  6390. * @{
  6391. */
  6392. /**
  6393. * @brief This function uses the sigmoid or tanh function to perform
  6394. * activation for signed 8-bit integer input vectors.
  6395. * @param[in,out] in_out pointer of the input/output vector
  6396. * @param[in] size number of elements in the input/output vector
  6397. * @param[in] int_bits number of the bits in the integer part, which is
  6398. * supposed to be smaller than 4
  6399. * @param[in] act_fun selection of activation functions. See the Note
  6400. * below for details.
  6401. *
  6402. * @note
  6403. * The available activation functions for selection include:
  6404. * - NN_SIGMOID: Use the sigmoid activation function
  6405. * - NN_TANH: Use the tanh activation function
  6406. *
  6407. * @b Example:
  6408. * @code
  6409. * #define SIZE 32
  6410. * q7_t in_out[SIZE] = {...};
  6411. * hpm_nn_activate_s8(in_out, SIZE, 0, NN_SIGMOID);
  6412. * @endcode
  6413. */
  6414. static inline void hpm_nn_activate_s8(q7_t *in_out,
  6415. uint32_t size,
  6416. uint16_t int_bits,
  6417. riscv_nn_activation_fun act_fun)
  6418. {
  6419. #if defined(__zcc__)
  6420. tpt_nn_activate_s8(in_out, size, int_bits, act_fun);
  6421. #else
  6422. riscv_nn_activate_s8(in_out, size, int_bits, act_fun);
  6423. #endif
  6424. }
  6425. /**
  6426. * @brief This function uses sigmoid or tanh function to perform
  6427. * activation for signed 16-bit integer input vectors.
  6428. * @param[in,out] in_out pointer of the input/output vector
  6429. * @param[in] size number of elements in the input/output vector
  6430. * @param[in] int_bits number of the bits in the integer part, which is
  6431. * supposed to be smaller than 4
  6432. * @param[in] act_fun selection of activation functions. See the Note
  6433. * below for details.
  6434. *
  6435. * @note
  6436. * The availbale activation functions for selection include:
  6437. * - NN_SIGMOID: Use the sigmoid activation function
  6438. * - NN_TANH: Use the tanh activation function
  6439. */
  6440. static inline void hpm_nn_activate_s16(q15_t *in_out,
  6441. uint32_t size,
  6442. uint16_t int_bits,
  6443. riscv_nn_activation_fun act_fun)
  6444. {
  6445. #if defined(__zcc__)
  6446. tpt_nn_activate_s16(in_out, size, int_bits, act_fun);
  6447. #else
  6448. riscv_nn_activate_s16(in_out, size, int_bits, act_fun);
  6449. #endif
  6450. }
  6451. /**
  6452. * @brief This function uses the leaky ReLU function to perform
  6453. * activation for signed 8-bit integer input vectors.
  6454. * @param[in,out] in_out pointer of the input/output vector
  6455. * @param[in] size number of elements in the input/output vector
  6456. * @param[in] slope slope value to be multiplied with the negative
  6457. * inputs. The result will be right shifted 15 bits
  6458. * to scale back to signed 8-bit integer.
  6459. *
  6460. * @b Example:
  6461. * @code
  6462. * #define SIZE 1024
  6463. * q15_t slope = 16384;
  6464. * q7_t in_out[SIZE] = {...};
  6465. * hpm_nn_leaky_relu_s8(in_out, SIZE, slope);
  6466. * @endcode
  6467. */
  6468. static inline void hpm_nn_leaky_relu_s8(q7_t *in_out,
  6469. uint32_t size,
  6470. q15_t slope)
  6471. #if defined(__zcc__)
  6472. tpt_nn_leaky_relu_q7(in_out, in_out, size, slope);
  6473. #else
  6474. riscv_nn_leaky_relu_s8(in_out, size, slope);
  6475. #endif
  6476. }
  6477. /**
  6478. * @brief This function uses the ReLU function to perform activation
  6479. * for signed 8-bit integer input vectors.
  6480. * @param[in,out] data pointer of the input/output vector
  6481. * @param[in] size number of elements in the input/output vector
  6482. * @param[in] max_val maximum value to limit the output vector
  6483. */
  6484. static inline void hpm_nn_relu_any_s8(q7_t *data, uint16_t size, q7_t max_val)
  6485. {
  6486. #if defined(__zcc__)
  6487. tpt_nn_relu_any_q7(data, size, max_val);
  6488. #else
  6489. riscv_nn_relu_any_s8(data, size, max_val);
  6490. #endif
  6491. }
  6492. /**
  6493. * @brief This function uses the ReLU function to perform activation
  6494. * for signed 8-bit integer input vectors.
  6495. * @param[in,out] in_out pointer of the input/output vector
  6496. * @param[in] size number of elements in the input/output vector
  6497. *
  6498. * @b Example:
  6499. * @code
  6500. * #define H 16
  6501. * #define W 16
  6502. * #define CH 5
  6503. * #define NUM (H * W *CH)
  6504. * q7_t in_out[NUM] = {...};
  6505. * hpm_nn_relu_s8(in_out, NUM);
  6506. * @endcode
  6507. */
  6508. static inline void hpm_nn_relu_s8(q7_t *in_out, uint32_t size)
  6509. {
  6510. #if defined(__zcc__)
  6511. tpt_nn_relu_q7(in_out, size);
  6512. #else
  6513. riscv_nn_relu_s8(in_out, size);
  6514. #endif
  6515. }
  6516. /**
  6517. * @brief This function uses the ReLU function to perform activation
  6518. * for signed 16-bit integer input vectors.
  6519. * @param[in,out] in_out pointer of the input/output vector
  6520. * @param[in] size number of elements in the input/output vector
  6521. */
  6522. static inline void hpm_nn_relu_s16(q15_t *in_out, uint32_t size)
  6523. {
  6524. #if defined(__zcc__)
  6525. tpt_nn_relu_q15(in_out, size);
  6526. #else
  6527. riscv_nn_relu_s16(in_out, size);
  6528. #endif
  6529. }
  6530. #ifdef __riscv_zfh
  6531. /**
  6532. * @brief This function uses the sigmoid function to perform
  6533. * activation for 16-bit half-precision floating point input
  6534. * vectors.
  6535. * @param[in] in_vec pointer of the input vector
  6536. * @param[in] size number of elements in the input/output vector
  6537. * @param[out] out_vec pointer of the output vector
  6538. * @return This function returns 0.
  6539. */
  6540. static inline int32_t hpm_nn_sigmoid_f16(const float16_t *in_vec,
  6541. uint32_t size,
  6542. float16_t *out_vec)
  6543. {
  6544. #if defined(__zcc__)
  6545. return tpt_nn_sigmoid_f16(in_vec, size, out_vec);
  6546. #else
  6547. return riscv_nn_sigmoid_f16(in_vec, size, out_vec);
  6548. #endif
  6549. }
  6550. /**
  6551. * @brief This function uses the tanh function to perform activation
  6552. * for 16-bit half-precision floating point input vectors.
  6553. * @param[in] in_vec pointer of the input vector
  6554. * @param[in] size number of elements in the input/output vector
  6555. * @param[out] out_vec pointer of the output vector
  6556. * @return This function returns 0.
  6557. */
  6558. static inline int32_t hpm_nn_tanh_f16(const float16_t *in_vec,
  6559. uint32_t size,
  6560. float16_t *out_vec)
  6561. {
  6562. #if defined(__zcc__)
  6563. return tpt_nn_tanh_f16(in_vec, size, out_vec);
  6564. #else
  6565. return riscv_nn_tanh_f16(in_vec, size, out_vec);
  6566. #endif
  6567. }
  6568. #endif
  6569. /**
  6570. * * @}
  6571. */
  6572. #endif
  6573. #endif
  6574. #ifdef HPM_MATH_NN_BASIC
  6575. #ifdef HPM_EN_MATH_NN_LIB
  6576. #if defined(__zcc__)
  6577. #include "tpt_nn_basic.h"
  6578. #else
  6579. #include "riscv_nn_basic.h"
  6580. #endif
  6581. /**
  6582. * @defgroup nnbasic NN Basic Functions
  6583. * @ingroup hpmmath
  6584. * @brief The basic functions are used to perform element-wise basic arithmetic
  6585. * operations.
  6586. *
  6587. * @{
  6588. */
  6589. /**
  6590. * @brief This function performs element-wise addition for signed
  6591. * 8-bit integer input vectors with two-stage shift.
  6592. * @param[in] in_tensor1 pointer of the first input vector
  6593. * @param[in] in_tensor2 pointer of the second input vector
  6594. * @param[in] scale1 pointer of the first scaling vector
  6595. * @param[in] scale2 pointer of the second scaling vector
  6596. * @param[in] size number of elements in the input vectors
  6597. * @param[in] pre_rshift right shift amount for the accumulator before
  6598. * the scaling
  6599. * @param[in] out_scale scaling value for the accumulator
  6600. * @param[in] post_rshift right shift amount for the accumulator after the
  6601. * scaling
  6602. * @param[out] out pointer of the element-wise addition results
  6603. *
  6604. * @b Example:
  6605. * @code
  6606. * #define SIZE 1024
  6607. * uint16_t pre_rshift = 8; // The addition results of both scaled input
  6608. * // tensors are in the range of 24-bit; thus, the
  6609. * // pre_rshift should be in the range of [0, 24].
  6610. * // Here we scale down the results into 16-bit
  6611. * // range.
  6612. * uint16_t out_scale = 3; // Scale up the result into 18-bit range.
  6613. * uint16_t post_rshift = 11; // Scale down the result into 7-bit range.
  6614. *
  6615. * q7_t in_tensor1[SIZE] = {...};
  6616. * q7_t in_tensor2[SIZE] = {...};
  6617. * q15_t scale1[SIZE] = {...};
  6618. * q15_t scale2[SIZE] = {...};
  6619. * q7_t out[SIZE];
  6620. *
  6621. * hpm_nn_add_s8_sym(in_tensor1, in_tensor2, scale1, scale2, SIZE, pre_rshift,
  6622. * out_scale, post_rshift, out);
  6623. * @endcode
  6624. */
  6625. static inline void hpm_nn_add_s8_sym(const q7_t *in_tensor1,
  6626. const q7_t *in_tensor2,
  6627. const int16_t *scale1,
  6628. const int16_t *scale2,
  6629. const uint32_t size,
  6630. const uint16_t pre_rshift,
  6631. const uint16_t out_scale,
  6632. const uint16_t post_rshift,
  6633. q7_t *out)
  6634. {
  6635. #if defined(__zcc__)
  6636. tpt_nn_add_s8_sym(in_tensor1, in_tensor2, scale1, scale2, size, pre_rshift,
  6637. out_scale, post_rshift, out);
  6638. #else
  6639. riscv_nn_add_s8_sym(in_tensor1, in_tensor2, scale1, scale2, size, pre_rshift,
  6640. out_scale, post_rshift, out);
  6641. #endif
  6642. }
  6643. /**
  6644. * @brief This function performs element-wise addition for signed
  6645. * 8-bit integer input vectors with two-stage shift with
  6646. * rounding.
  6647. * @param[in] in_tensor1 pointer of the first input vector
  6648. * @param[in] in_tensor2 pointer of the second input vector
  6649. * @param[in] scale1 scaling value for the first input vector. It
  6650. * should be in the range of 0 to {2^23}.
  6651. * @param[in] scale2 scaling value for the second input vector. It
  6652. * should be in the range of 0 to {2^23}.
  6653. * @param[in] size number of elements in the input vectors
  6654. * @param[in] pre_rshift right shift amount for the accumulator before
  6655. * the scaling
  6656. * @param[in] out_scale scaling value for the accumulator
  6657. * @param[in] post_rshift right shift amount for the accumulator after the
  6658. * scaling
  6659. * @param[out] out pointer of element-wise addition results
  6660. *
  6661. */
  6662. static inline void hpm_nn_add_s8_sym_round(const q7_t *in_tensor1,
  6663. const q7_t *in_tensor2,
  6664. const uint32_t scale1,
  6665. const uint32_t scale2,
  6666. const uint32_t size,
  6667. const uint16_t pre_rshift,
  6668. const uint16_t out_scale,
  6669. const uint16_t post_rshift,
  6670. q7_t *out)
  6671. {
  6672. #if defined(__zcc__)
  6673. tpt_nn_add_s8_sym_round(in_tensor1, in_tensor2, scale1, scale2, size,
  6674. pre_rshift, out_scale, post_rshift, out);
  6675. #else
  6676. riscv_nn_add_s8_sym_round(in_tensor1, in_tensor2, scale1, scale2, size,
  6677. pre_rshift, out_scale, post_rshift, out);
  6678. #endif
  6679. }
  6680. /**
  6681. * @brief This function performs element-wise addition for signed
  6682. * 8-bit integer input vectors.
  6683. * @param[in] in_tensor1 pointer of the first input vector
  6684. * @param[in] in_tensor2 pointer of the second input vector
  6685. * @param[in] in_offset1 offset value for first input vector. It should
  6686. * be in the range of -127 to 128.
  6687. * @param[in] in_scale1 scaling value for first input vector
  6688. * @param[in] in_rshift1 right shift amount for the first input vector
  6689. * @param[in] in_offset2 offset value for the second input vector. It
  6690. * should be in the range of -127 to 128.
  6691. * @param[in] in_scale2 scaling value for the second input vector
  6692. * @param[in] in_rshift2 right shift amount for the second input vector
  6693. * @param[in] lshift left shift amount for the first and second input
  6694. * vectors
  6695. * @param[out] out pointer of the element-wise addition results
  6696. * @param[in] out_offset offset value for the output
  6697. * @param[in] out_scale scaling value for the output
  6698. * @param[in] out_rshift right shift amount for the output
  6699. * @param[in] act_min minimum value that the output is limited to
  6700. * @param[in] act_max maximum value that the output is limited to
  6701. * @param[in] size number of elements in the input vectors
  6702. * @return This function returns 0.
  6703. *
  6704. * @b Example:
  6705. * @code
  6706. * #define SIZE 1024
  6707. * int32_t in_offset1 = 16; // Offset for in_tensor1
  6708. * int32_t in_scale1 = (1<<28); // Scale down in_tensor1 by 1/23
  6709. * int32_t in_rshift1 = 3; // Scale down in_tensor1 by 1/23
  6710. * int32_t in_offset2 = 17; // Offset for in_tensor2
  6711. * int32_t in_scale2 = (1<<28); // Scale down in_tensor2 by 1/23
  6712. * int32_t in_rshift2 = 3; // Scale down in_tensor2 by 1/23
  6713. * int32_t lshift = 10; // Scale up the input tensor by 210 times
  6714. * int32_t out_offset = 18; // Offset for the output tensor
  6715. * int32_t out_scale = (1<<30); // Scale down in_tensor2 by 1/2
  6716. * int32_t out_rshift = 4; // Scale down in_tensor2 by 1/24
  6717. * int32_t act_min = 0xffffffa3; // Limit the outputs in the range of
  6718. * // [0xffffffa3, 0x0000005d]
  6719. * int32_t act_max = 0x0000005d; // Limit the outputs in the range of
  6720. * // [0xffffffa3, 0x0000005d]
  6721. *
  6722. * int8_t in_tensor1[SIZE] = {...};
  6723. * int8_t in_tensor2[SIZE] = {...};
  6724. * int8_t out[SIZE];
  6725. *
  6726. * hpm_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
  6727. * in_rshift1, in_offset2, in_scale2, in_rshift2, lshift, out, out_offset,
  6728. * out_scale, out_rshift, act_min, act_max, SIZE);
  6729. * @endcode
  6730. */
  6731. static inline int hpm_nn_ew_add_s8_asym(const int8_t *in_tensor1,
  6732. const int8_t *in_tensor2,
  6733. const int32_t in_offset1,
  6734. const int32_t in_scale1,
  6735. const int32_t in_rshift1,
  6736. const int32_t in_offset2,
  6737. const int32_t in_scale2,
  6738. const int32_t in_rshift2,
  6739. const int32_t lshift,
  6740. int8_t *out,
  6741. const int32_t out_offset,
  6742. const int32_t out_scale,
  6743. const int32_t out_rshift,
  6744. const int32_t act_min,
  6745. const int32_t act_max,
  6746. const uint32_t size)
  6747. {
  6748. #if defined(__zcc__)
  6749. return tpt_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
  6750. in_rshift1, in_offset2, in_scale2, in_rshift2,
  6751. lshift, out, out_offset, out_scale, out_rshift,
  6752. act_min, act_max, size);
  6753. #else
  6754. return riscv_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
  6755. in_rshift1, in_offset2, in_scale2, in_rshift2,
  6756. lshift, out, out_offset, out_scale, out_rshift,
  6757. act_min, act_max, size);
  6758. #endif
  6759. }
  6760. /**
  6761. * @brief This function performs element-wise multiplication for
  6762. * signed 8-bit integer input vectors.
  6763. * @param[in] in_tensor1 pointer of the first input vector
  6764. * @param[in] in_tensor2 pointer of the second input vector
  6765. * @param[in] in_offset1 offset value for the first input vector. It
  6766. * should be in the range of -127 to 128.
  6767. * @param[in] in_offset2 offset value for the second input vector. It
  6768. * should be in the range of -127 to 128.
  6769. * @param[out] out pointer of element-wise multiplication results
  6770. * @param[in] out_offset offset value for the output
  6771. * @param[in] out_scale scaling value for the output
  6772. * @param[in] out_shift shift amount for the output
  6773. * @param[in] act_min minimum value that the output is limited to
  6774. * @param[in] act_max maximum value that the output is limited to
  6775. * @param[in] size number of elements in the input vectors
  6776. * @return This function returns 0.
  6777. *
  6778. * @b Example:
  6779. * @code
  6780. * #define SIZE 1024
  6781. * int32_t in_offset1 = 16; // Offset for in_tensor1
  6782. * int32_t in_offset2 = 17; // Offset for in_tensor2
  6783. * int32_t out_offset = 18; // Offset for the output tensor
  6784. * int32_t out_scale = (1<<30); // Scale down the output tensor by 1/2
  6785. * int32_t out_shift = -4; // Scale down the output tensor by 1/24
  6786. * int32_t act_min = 0xffffffa3; // Limit the outputs in the range of
  6787. * // [0xffffffa3, 0x0000005d]
  6788. * int32_t act_max = 0x0000005d; // Limit the outputs in the range of
  6789. * // [0xffffffa3, 0x0000005d]
  6790. *
  6791. * in_tensor1[SIZE] = {...};
  6792. * in_tensor2[SIZE] = {...};
  6793. * out[SIZE];
  6794. *
  6795. * hpm_nn_ew_mul_s8_asym(in_tensor1, in_tensor2, in_offset1, in_offset2, out,
  6796. * out_offset, out_scale, out_shift, act_min, act_max, SIZE);
  6797. * @endcode
  6798. */
  6799. static inline int hpm_nn_ew_mul_s8_asym(const int8_t *in_tensor1,
  6800. const int8_t *in_tensor2,
  6801. const int32_t in_offset1,
  6802. const int32_t in_offset2,
  6803. int8_t *out,
  6804. const int32_t out_offset,
  6805. const int32_t out_scale,
  6806. const int32_t out_shift,
  6807. const int32_t act_min,
  6808. const int32_t act_max,
  6809. const uint32_t size)
  6810. {
  6811. #if defined(__zcc__)
  6812. return tpt_nn_ew_mul_s8_asym(in_tensor1, in_tensor2, in_offset1, in_offset2,
  6813. out, out_offset, out_scale, out_shift, act_min,
  6814. act_max, size);
  6815. #else
  6816. return riscv_nn_ew_mul_s8_asym(in_tensor1, in_tensor2, in_offset1, in_offset2,
  6817. out, out_offset, out_scale, out_shift, act_min,
  6818. act_max, size);
  6819. #endif
  6820. }
  6821. /**
  6822. * * @}
  6823. */
  6824. #endif
  6825. #ifdef HPM_EN_MATH_NN_RVP32_LIB
  6826. #if defined(__zcc__)
  6827. #include "tpt_nn_basic.h"
  6828. #else
  6829. #include "riscv_nn_basic.h"
  6830. #endif
  6831. /**
  6832. * @brief This function performs element-wise addition for signed
  6833. * 8-bit integer input vectors.
  6834. * @param[in] in_tensor1 pointer of the first input vector
  6835. * @param[in] in_tensor2 pointer of the second input vector
  6836. * @param[in] in_offset1 offset value for first input vector. It should
  6837. * be in the range of -127 to 128.
  6838. * @param[in] in_scale1 scaling value for first input vector
  6839. * @param[in] in_rshift1 right shift amount for the first input vector
  6840. * @param[in] in_offset2 offset value for the second input vector. It
  6841. * should be in the range of -127 to 128.
  6842. * @param[in] in_scale2 scaling value for the second input vector
  6843. * @param[in] in_rshift2 right shift amount for the second input vector
  6844. * @param[in] lshift left shift amount for the first and second input
  6845. * vectors
  6846. * @param[out] out pointer of the element-wise addition results
  6847. * @param[in] out_offset offset value for the output
  6848. * @param[in] out_scale scaling value for the output
  6849. * @param[in] out_rshift right shift amount for the output
  6850. * @param[in] act_min minimum value that the output is limited to
  6851. * @param[in] act_max maximum value that the output is limited to
  6852. * @param[in] size number of elements in the input vectors
  6853. * @return This function returns 0.
  6854. *
  6855. *
  6856. * @b Example:
  6857. * @code
  6858. * #define SIZE 1024
  6859. * int32_t in_offset1 = 16; // Offset for in_tensor1
  6860. * int32_t in_scale1 = (1<<28); // Scale down in_tensor1 by 1/23
  6861. * int32_t in_rshift1 = 3; // Scale down in_tensor1 by 1/23
  6862. * int32_t in_offset2 = 17; // Offset for in_tensor2
  6863. * int32_t in_scale2 = (1<<28); // Scale down in_tensor2 by 1/23
  6864. * int32_t in_rshift2 = 3; // Scale down in_tensor2 by 1/23
  6865. * int32_t lshift = 10; // Scale up the input tensor by 210 times
  6866. * int32_t out_offset = 18; // Offset for the output tensor
  6867. * int32_t out_scale = (1<<30); // Scale down in_tensor2 by 1/2
  6868. * int32_t out_rshift = 4; // Scale down in_tensor2 by 1/24
  6869. * int32_t act_min = 0xffffffa3; // Limit the outputs in the range of
  6870. * // [0xffffffa3, 0x0000005d]
  6871. * int32_t act_max = 0x0000005d; // Limit the outputs in the range of
  6872. * // [0xffffffa3, 0x0000005d]
  6873. *
  6874. * int8_t in_tensor1[SIZE] = {...};
  6875. * int8_t in_tensor2[SIZE] = {...};
  6876. * int8_t out[SIZE];
  6877. *
  6878. * hpm_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
  6879. * in_rshift1, in_offset2, in_scale2, in_rshift2, lshift, out, out_offset,
  6880. * out_scale, out_rshift, act_min, act_max, SIZE);
  6881. * @endcode
  6882. */
  6883. static inline int hpm_nn_ew_add_s8_asym(const int8_t *in_tensor1,
  6884. const int8_t *in_tensor2,
  6885. const int32_t in_offset1,
  6886. const int32_t in_scale1,
  6887. const int32_t in_rshift1,
  6888. const int32_t in_offset2,
  6889. const int32_t in_scale2,
  6890. const int32_t in_rshift2,
  6891. const int32_t lshift,
  6892. int8_t *out,
  6893. const int32_t out_offset,
  6894. const int32_t out_scale,
  6895. const int32_t out_rshift,
  6896. const int32_t act_min,
  6897. const int32_t act_max,
  6898. const uint32_t size)
  6899. {
  6900. #if defined(__zcc__)
  6901. return tpt_elementwise_add_s8(out, out_offset, out_scale, -out_rshift, act_min,
  6902. act_max, in_tensor1, in_tensor2, in_offset1, in_scale1,
  6903. in_rshift1, in_offset2, in_scale2, in_rshift2,
  6904. lshift, size);
  6905. #else
  6906. return riscv_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
  6907. in_rshift1, in_offset2, in_scale2, in_rshift2,
  6908. lshift, out, out_offset, out_scale, out_rshift,
  6909. act_min, act_max, size);
  6910. #endif
  6911. }
  6912. #endif
  6913. #endif
  6914. #ifdef HPM_MATH_NN_CONCATENATION
  6915. #ifdef HPM_EN_MATH_NN_LIB
  6916. #if defined(__zcc__)
  6917. #include "tpt_nn_concatenation.h"
  6918. #else
  6919. #include "riscv_nn_concatenation.h"
  6920. #endif
  6921. /**
  6922. * @defgroup nnconcatenation NN Concatenation Functions
  6923. * @ingroup hpmmath
  6924. * @brief The concatenation functions are used to concatenate the tensor along
  6925. * the specified axis.
  6926. *
  6927. * @{
  6928. */
  6929. /**
  6930. * @brief This function concatenates the int8_t/uint8_t input tensor along
  6931. * the w-axis with the output tensor.
  6932. * @param[in] in_tensor pointer of the input tensor
  6933. * @param[in] in_tensor_x x dimension of the input tensor
  6934. * @param[in] in_tensor_y y dimension of the input tensor
  6935. * @param[in] in_tensor_z z dimension of the input tensor
  6936. * @param[in] in_tensor_w w dimension of the input tensor
  6937. * @param[in] out_tensor pointer of the output tensor
  6938. * @param[in] out_offset_w offset value to be added to the w axis of the
  6939. * output tensor before the concatenation
  6940. *
  6941. * @note
  6942. * The x, y and z dimension of the output tensor will be the same as those of
  6943. * the input tensor.
  6944. */
  6945. static inline void hpm_nn_concate_s8_w(const int8_t *in_tensor,
  6946. const uint16_t in_tensor_x,
  6947. const uint16_t in_tensor_y,
  6948. const uint16_t in_tensor_z,
  6949. const uint16_t in_tensor_w,
  6950. int8_t *out_tensor,
  6951. const uint32_t out_offset_w)
  6952. {
  6953. #if defined(__zcc__)
  6954. tpt_concatenation_s8_w(out_tensor, in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
  6955. in_tensor_w, out_offset_w);
  6956. #else
  6957. riscv_nn_concate_s8_w(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
  6958. in_tensor_w, out_tensor, out_offset_w);
  6959. #endif
  6960. }
  6961. /**
  6962. * @brief This function concatenates the int8_t/uint8_t input tensor along
  6963. * the x-axis with the output tensor.
  6964. * @param[in] in_tensor pointer of the input tensor
  6965. * @param[in] in_tensor_x x dimension of the input tensor
  6966. * @param[in] in_tensor_y y dimension of the input tensor
  6967. * @param[in] in_tensor_z z dimension of the input tensor
  6968. * @param[in] in_tensor_w w dimension of the input tensor
  6969. * @param[in] out_tensor pointer of the output tensor
  6970. * @param[in] out_tensor_x x dimension of the output tensor
  6971. * @param[in] out_offset_x offset value to be added to the x axis of the
  6972. * output tensor before the concatenation
  6973. *
  6974. * @note
  6975. * The y, z and w dimensions of the output tensor will be the same as those of
  6976. * the input tensor.
  6977. */
  6978. static inline void hpm_nn_concate_s8_x(const int8_t *in_tensor,
  6979. const uint16_t in_tensor_x,
  6980. const uint16_t in_tensor_y,
  6981. const uint16_t in_tensor_z,
  6982. const uint16_t in_tensor_w,
  6983. int8_t *out_tensor,
  6984. const uint16_t out_tensor_x,
  6985. const uint32_t out_offset_x)
  6986. {
  6987. #if defined(__zcc__)
  6988. tpt_nn_concate_s8_x(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
  6989. in_tensor_w, out_tensor, out_tensor_x, out_offset_x);
  6990. #else
  6991. riscv_nn_concate_s8_x(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
  6992. in_tensor_w, out_tensor, out_tensor_x, out_offset_x);
  6993. #endif
  6994. }
  6995. /**
  6996. * @brief This function concatenates the int8_t/uint8_t input tensor along
  6997. * the y-axis with the output tensor.
  6998. * @param[in] in_tensor pointer of the input tensor
  6999. * @param[in] in_tensor_x x dimension of the input tensor
  7000. * @param[in] in_tensor_y y dimension of the input tensor
  7001. * @param[in] in_tensor_z z dimension of the input tensor
  7002. * @param[in] in_tensor_w w dimension of the input tensor
  7003. * @param[in] out_tensor pointer of the output tensor
  7004. * @param[in] out_tensor_y y dimension of the output tensor
  7005. * @param[in] out_offset_y offset value to be added to the y axis of the
  7006. * output tensor before the concatenation
  7007. *
  7008. * @note
  7009. * The x, z and w dimensions of the output tensor will be the same as those of
  7010. * the input tensor.
  7011. */
  7012. static inline void hpm_nn_concate_s8_y(const int8_t *in_tensor,
  7013. const uint16_t in_tensor_x,
  7014. const uint16_t in_tensor_y,
  7015. const uint16_t in_tensor_z,
  7016. const uint16_t in_tensor_w,
  7017. int8_t *out_tensor,
  7018. const uint16_t out_tensor_y,
  7019. const uint32_t out_offset_y)
  7020. {
  7021. #if defined(__zcc__)
  7022. tpt_nn_concate_s8_y(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
  7023. in_tensor_w, out_tensor, out_tensor_y, out_offset_y);
  7024. #else
  7025. riscv_nn_concate_s8_y(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
  7026. in_tensor_w, out_tensor, out_tensor_y, out_offset_y);
  7027. #endif
  7028. }
  7029. /**
  7030. * @brief This function concatenates the int8_t/uint8_t input tensor along
  7031. * the z-axis with the output tensor.
  7032. * @param[in] in_tensor pointer of the input tensor
  7033. * @param[in] in_tensor_x x dimension of the input tensor
  7034. * @param[in] in_tensor_y y dimension of the input tensor
  7035. * @param[in] in_tensor_z z dimension of the input tensor
  7036. * @param[in] in_tensor_w w dimension of the input tensor
  7037. * @param[in] out_tensor pointer of the output tensor
  7038. * @param[in] out_tensor_z z dimension of the output tensor
  7039. * @param[in] out_offset_z offset value to be added to the z axis of the
  7040. * output tensor before the concatenation
  7041. *
  7042. * @note
  7043. * The x, y and w dimensions of the output tensor will be the same as those of
  7044. * the input tensor.
  7045. */
  7046. static inline void hpm_nn_concate_s8_z(const int8_t *in_tensor,
  7047. const uint16_t in_tensor_x,
  7048. const uint16_t in_tensor_y,
  7049. const uint16_t in_tensor_z,
  7050. const uint16_t in_tensor_w,
  7051. int8_t *out_tensor,
  7052. const uint16_t out_tensor_z,
  7053. const uint32_t out_offset_z)
  7054. {
  7055. #if defined(__zcc__)
  7056. tpt_nn_concate_s8_z(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
  7057. in_tensor_w, out_tensor, out_tensor_z, out_offset_z);
  7058. #else
  7059. riscv_nn_concate_s8_z(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
  7060. in_tensor_w, out_tensor, out_tensor_z, out_offset_z);
  7061. #endif
  7062. }
  7063. /**
  7064. * * @}
  7065. */
  7066. #endif
  7067. #endif
  7068. #ifdef HPM_MATH_NN_CONVOLUTION
  7069. #ifdef HPM_EN_MATH_NN_LIB
  7070. #if defined(__zcc__)
  7071. #include "tpt_nn_convolution.h"
  7072. #else
  7073. #include "riscv_nn_convolution.h"
  7074. #endif
  7075. /**
  7076. * @defgroup nnconvolution NN Convolution Functions
  7077. * @ingroup hpmmath
  7078. * @brief The convolution functions transform the input matrix into a column
  7079. * vector with im2col, and then use matrix-matrix multiplication to get the
  7080. * convolution result.
  7081. *
  7082. * @{
  7083. */
  7084. /**
  7085. * @brief This function performs 1x1 kernels convolution for signed
  7086. * 8-bit integer inputs/outputs in any x and y dimensions with
  7087. * shift-based quantization on the outputs.
  7088. * @param[in] in_tensor pointer of the input tensor
  7089. * @param[in] in_tensor_dim_x x dimension of the input tensor
  7090. * @param[in] in_tensor_dim_y y dimension of the input tensor
  7091. * @param[in] in_tensor_ch number of input tensor channels
  7092. * @param[in] ker_weight pointer of kernel weights
  7093. * @param[in] out_tensor_ch number of output tensor channels
  7094. * @param[in] ker_dim_x x dimension of the filter kernel
  7095. * @param[in] ker_dim_y y dimension of the filter kernel
  7096. * @param[in] pad_x padding size in the x dimension
  7097. * @param[in] pad_y padding size in the y dimension
  7098. * @param[in] stride_x convolution stride in the x dimension
  7099. * @param[in] stride_y convolution stride in the y dimension
  7100. * @param[in] bias pointer of the bias vector
  7101. * @param[in] bias_lshift left shift amount for the bias
  7102. * @param[in] out_rshift right shift amount for the output
  7103. * @param[out] out_tensor pointer of the output tensor
  7104. * @param[in] out_tensor_dim_x x dimension of the output tensor
  7105. * @param[in] out_tensor_dim_y y dimension of the output tensor
  7106. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  7107. * It is required when -mext-dsp or
  7108. * -mext-vector is enabled and its size
  7109. * must be equal to "2 * in_tensor_ch *
  7110. * ker_dim_x * ker_dim_y".
  7111. * @param[in] tmp_buf dummy
  7112. * @return This function returns 0 on success; otherwise, it returns -1
  7113. * if its inputs do not meet the constraints (see the Note
  7114. * below for details).
  7115. *
  7116. * @note
  7117. * - The input constraints of this function are:
  7118. * - in_tensor_ch is a multiple of 4
  7119. * - out_tensor_ch is a multiple of 2
  7120. * - ker_dim_x is 1
  7121. * - ker_dim_y is 1
  7122. * - pad_x is 0
  7123. * - pad_y is 0
  7124. * - stride_x is 1
  7125. * - stride_y is 1
  7126. *
  7127. * @b Example:
  7128. * @code
  7129. * //Convolve a 160x120x20 input tensor with a 1x1 kernel and generate a
  7130. * //160x120x8 output tensor. Let both dimensions padding be 0 and their
  7131. * //stride be 1.
  7132. *
  7133. * #define IN_X 160
  7134. * #define IN_Y 120
  7135. * #define IN_CH 20
  7136. * #define OUT_CH 8
  7137. * #define KER_DIM_X 1
  7138. * #define KER_DIM_Y 1
  7139. * #define PAD_X 0
  7140. * #define PAD_Y 0
  7141. * #define STRIDE_X 1
  7142. * #define STRIDE_Y 1
  7143. * #define BIAS_LSHIFT 6 //Scale up the bias by 2^6
  7144. * #define OUT_RSHIFT 9 //Scale down the output tensor by 1/2^9
  7145. * #define OUT_X 160
  7146. * #define OUT_Y 120
  7147. *
  7148. * q7_t in_data[IN_CH * IN_X * IN_Y] = {...};
  7149. * q7_t weight[IN_CH * KER_DIM_X * KER_DIM_Y * OUT_CH] = {...};
  7150. * q7_t bias[OUT_CH] = {...};
  7151. * q15_t in_tmp_buf[2 * IN_CH * KER_DIM_X * KER_DIM_Y] = {0};
  7152. * q7_t out_data[OUT_CH * OUT_X * OUT_Y];
  7153. *
  7154. * riscv_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(in_data, IN_X, IN_Y ,
  7155. * IN_CH, weight, OUT_CH, KER_DIM_X, KER_DIM_Y, PAD_X, PAD_Y, STRIDE_X,
  7156. * STRIDE_Y, bias, BIAS_LSHIFT, OUT_RSHIFT, out_data, OUT_X, OUT_Y,
  7157. * in_tmp_buf, NULL);
  7158. * @endcode
  7159. */
  7160. static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(const q7_t *in_tensor,
  7161. const uint16_t in_tensor_dim_x,
  7162. const uint16_t in_tensor_dim_y,
  7163. const uint16_t in_tensor_ch,
  7164. const q7_t *ker_weight,
  7165. const uint16_t out_tensor_ch,
  7166. const uint16_t ker_dim_x,
  7167. const uint16_t ker_dim_y,
  7168. const uint16_t pad_x,
  7169. const uint16_t pad_y,
  7170. const uint16_t stride_x,
  7171. const uint16_t stride_y,
  7172. const q7_t *bias,
  7173. const uint16_t bias_lshift,
  7174. const uint16_t out_rshift,
  7175. q7_t *out_tensor,
  7176. const uint16_t out_tensor_dim_x,
  7177. const uint16_t out_tensor_dim_y,
  7178. q15_t *in_tmp_buf,
  7179. q7_t *tmp_buf)
  7180. {
  7181. #if defined(__zcc__)
  7182. return tpt_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(
  7183. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  7184. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  7185. bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
  7186. out_tensor_dim_y, in_tmp_buf, tmp_buf);
  7187. #else
  7188. return riscv_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(
  7189. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  7190. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  7191. bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
  7192. out_tensor_dim_y, in_tmp_buf, tmp_buf);
  7193. #endif
  7194. }
  7195. /**
  7196. * @brief This function performs signed 8-bit integer convolution for
  7197. * RGB images with shift-based quantization on the outputs.
  7198. * @param[in] in_tensor pointer of the input tensor
  7199. * @param[in] in_tensor_dim input tensor dimension
  7200. * @param[in] ker_weight pointer of kernel weights
  7201. * @param[in] out_tensor_ch number of output tensor channels
  7202. * @param[in] ker_dim dimension of the filter kernel
  7203. * @param[in] pad padding size
  7204. * @param[in] stride convolution stride
  7205. * @param[in] bias pointer of the bias vector
  7206. * @param[in] bias_lshift left shift amount for the bias
  7207. * @param[in] out_rshift right shift amount for the output
  7208. * @param[out] out_tensor pointer of the output tensor
  7209. * @param[in] out_tensor_dim dimension of the output tensor
  7210. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  7211. * required when -mext-dsp or -mext-vector
  7212. * enabled and its size must be equal to "2 *
  7213. * (3 * ker_dim * ker_dim + 1)".
  7214. * @param[in] tmp_buf temporary buffer for kernel weights. It is
  7215. * required when -mext-vector enabled and its
  7216. * size must be "out_tensor_ch * (3 * ker_dim *
  7217. * ker_dim + 1)".
  7218. * @return This function only returns 0.
  7219. *
  7220. * @b Example:
  7221. * @code
  7222. * //Convolve a 28x28x3 input tensor with a 5x5 kernel and generate a 24x24x20
  7223. * //output tensor. Let both dimensions padding be 0 and their stride be 1.
  7224. *
  7225. * #define IN_DIM 28
  7226. * #define KER_DIM 5
  7227. * #define PAD 0
  7228. * #define STRIDE 1
  7229. * #define BIAS_LSHIFT 6
  7230. * #define OUT_RSHIFT 10
  7231. * #define OUT_CH 20
  7232. * #define OUT_DIM 24
  7233. *
  7234. * q7_t in_data[3 * IN_DIM * IN_DIM] = {...};
  7235. * q7_t weight[3 * KER_DIM * KER_DIM * OUT_CH] = {...};
  7236. * q7_t bias[OUT_CH] = {...};
  7237. * q15_t in_tmp_buf[2 * 3 * KER_DIM * KER_DIM] = {0};
  7238. * q7_t out_data[OUT_CH * OUT_DIM * OUT_DIM];
  7239. *
  7240. * riscv_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(in_data, IN_DIM, weight, OUT_CH,
  7241. * KER_DIM, PAD, STRIDE, bias, BIAS_LSHIFT, OUT_RSHIFT, out_data, OUT_DIM,
  7242. * in_tmp_buf, NULL);
  7243. * @endcode
  7244. */
  7245. static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(const q7_t *in_tensor,
  7246. const uint16_t in_tensor_dim,
  7247. const q7_t *ker_weight,
  7248. const uint16_t out_tensor_ch,
  7249. const uint16_t ker_dim,
  7250. const uint16_t pad,
  7251. const uint16_t stride,
  7252. const q7_t *bias,
  7253. const uint16_t bias_lshift,
  7254. const uint16_t out_rshift,
  7255. q7_t *out_tensor,
  7256. const uint16_t out_tensor_dim,
  7257. q15_t *in_tmp_buf,
  7258. q7_t *tmp_buf)
  7259. {
  7260. #if defined(__zcc__)
  7261. return tpt_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(
  7262. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  7263. bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim, in_tmp_buf,
  7264. tmp_buf);
  7265. #else
  7266. return riscv_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(
  7267. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  7268. bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim, in_tmp_buf,
  7269. tmp_buf);
  7270. #endif
  7271. }
  7272. /**
  7273. * @brief This function performs fast signed 8-bit integer convolution
  7274. * for RGB images with shift-based quantization on the outputs.
  7275. * @param[in] in_tensor pointer of the input tensor
  7276. * @param[in] in_tensor_dim dimension of the input tensor
  7277. * @param[in] ker_weight pointer of kernel weights
  7278. * @param[in] out_tensor_ch number of output tensor channels
  7279. * @param[in] ker_dim dimension of the filter kernel
  7280. * @param[in] pad padding size
  7281. * @param[in] stride convolution stride
  7282. * @param[in] bias pointer of the bias vector
  7283. * @param[in] bias_lshift left shift amount for the bias
  7284. * @param[in] out_rshift right shift amount for the output
  7285. * @param[out] out_tensor pointer of the output tensor
  7286. * @param[in] out_tensor_dim dimension of the output tensor
  7287. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  7288. * required when -mext-dsp or -mext-vector
  7289. * enabled and its size must be "2 * (3 *
  7290. * ker_dim * ker_dim + 1)".
  7291. * @param[in] wt_tmp_buf temporary buffer for kernel weights. It is
  7292. * required when -mext-dsp or -mext-vector
  7293. * enabled and its size must be "out_tensor_ch *
  7294. * (3 * ker_dim * ker_dim + 1)".
  7295. * @return This function only returns 0.
  7296. *
  7297. * @b Example:
  7298. * @code
  7299. * //Convolve a 28x28x3 input tensor with a 5x5 kernel and generate a 24x24x20
  7300. * //output tensor. Let both dimensions padding be 0 and their stride be 1.
  7301. *
  7302. * #define IN_DIM 28
  7303. * #define KER_DIM 5
  7304. * #define PAD 0
  7305. * #define STRIDE 1
  7306. * #define BIAS_LSHIFT 6
  7307. * #define OUT_RSHIFT 10
  7308. * #define OUT_CH 20
  7309. * #define OUT_DIM 24
  7310. *
  7311. * q7_t in_data[3 * IN_DIM * IN_DIM] = {...};
  7312. * q7_t weight[3 * KER_DIM * KER_DIM * OUT_CH] = {...};
  7313. * q7_t bias[OUT_CH] = {...};
  7314. * q15_t in_tmp_buf[2 * (3 * KER_DIM * KER_DIM + 1)] = {0};
  7315. * q15_t wt_tmp_buf[OUT_CH * (3 * KER_DIM * KER_DIM + 1)];
  7316. * q7_t out_data[OUT_CH * OUT_DIM * OUT_DIM];
  7317. *
  7318. * riscv_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(in_data, IN_DIM, weight,
  7319. * OUT_CH, KER_DIM, PAD, STRIDE, bias, BIAS_LSHIFT, OUT_RSHIFT, out_data,
  7320. * OUT_DIM, in_tmp_buf, wt_tmp_buf);
  7321. * @endcode
  7322. */
  7323. static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(const q7_t *in_tensor,
  7324. const uint16_t in_tensor_dim,
  7325. const q7_t *ker_weight,
  7326. const uint16_t out_tensor_ch,
  7327. const uint16_t ker_dim,
  7328. const uint16_t pad,
  7329. const uint16_t stride,
  7330. const q7_t *bias,
  7331. const uint16_t bias_lshift,
  7332. const uint16_t out_rshift,
  7333. q7_t *out_tensor,
  7334. const uint16_t out_tensor_dim,
  7335. q15_t *in_tmp_buf,
  7336. q15_t *wt_tmp_buf)
  7337. {
  7338. #if defined(__zcc__)
  7339. return tpt_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(
  7340. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  7341. bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim, in_tmp_buf,
  7342. wt_tmp_buf);
  7343. #else
  7344. return riscv_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(
  7345. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  7346. bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim, in_tmp_buf,
  7347. wt_tmp_buf);
  7348. #endif
  7349. }
  7350. /**
  7351. * @brief This function performs signed 8-bit integer convolution with
  7352. * shift-based quantization on the outputs.
  7353. * @param[in] in_tensor pointer of the input tensor
  7354. * @param[in] in_tensor_dim dimension of the input tensor
  7355. * @param[in] in_tensor_ch number of input tensor channels
  7356. * @param[in] ker_weight pointer of kernel weights
  7357. * @param[in] out_tensor_ch number of output tensor channels
  7358. * @param[in] ker_dim dimension of the filter kernel
  7359. * @param[in] pad padding size
  7360. * @param[in] stride convolution stride
  7361. * @param[in] bias pointer of the bias vector
  7362. * @param[in] bias_lshift left shift amount for the bias
  7363. * @param[in] out_rshift right shift amount for the output
  7364. * @param[out] out_tensor pointer of the output tensor
  7365. * @param[in] out_tensor_dim dimension of the output tensor
  7366. * @param[in] in_tmp_buf temporary buffer for input tensor. It is
  7367. * required when -mext-dsp or -mext-vector is
  7368. * enabled and its size must be equal to "2 *
  7369. * in_tensor_ch * ker_dim * ker_dim".
  7370. * @param[in] tmp_buf dummy
  7371. * @return This function only returns 0.
  7372. *
  7373. * @b Example:
  7374. * @code
  7375. * //Convolve a 28x28x1 input tensor with a 5x5 kernel and generate a 24x24x20
  7376. * //output tensor. Let both dimensions padding be 0 and their stride be 1.
  7377. *
  7378. * #define IN_DIM 28
  7379. * #define IN_CH 1
  7380. * #define KER_DIM 5
  7381. * #define PAD 0
  7382. * #define STRIDE 1
  7383. * #define BIAS_LSHIFT 6
  7384. * #define OUT_RSHIFT 10
  7385. * #define OUT_CH 20
  7386. * #define OUT_DIM 24
  7387. *
  7388. * q7_t in_data[IN_CH * IN_DIM * IN_DIM] = {...};
  7389. * q7_t weight[IN_CH * KER_DIM * KER_DIM * OUT_CH] = {...};
  7390. * q7_t bias[OUT_CH] = {...};
  7391. * q15_t in_tmp_buf[2 * IN_CH * KER_DIM * KER_DIM] = {0};
  7392. * q7_t out_data[OUT_CH * OUT_DIM * OUT_DIM];
  7393. *
  7394. * riscv_nn_conv_HWC_s8_s8_s8_sft_bias(in_data, IN_DIM, IN_CH, weight, OUT_CH,
  7395. * KER_DIM, PAD, STRIDE, bias, BIAS_LSHIFT, OUT_RSHIFT, out_data, OUT_DIM,
  7396. * in_tmp_buf, NULL);
  7397. * @endcode
  7398. */
  7399. static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias(const q7_t *in_tensor,
  7400. const uint16_t in_tensor_dim,
  7401. const uint16_t in_tensor_ch,
  7402. const q7_t *ker_weight,
  7403. const uint16_t out_tensor_ch,
  7404. const uint16_t ker_dim,
  7405. const uint16_t pad,
  7406. const uint16_t stride,
  7407. const q7_t *bias,
  7408. const uint16_t bias_lshift,
  7409. const uint16_t out_rshift,
  7410. q7_t *out_tensor,
  7411. const uint16_t out_tensor_dim,
  7412. q15_t *in_tmp_buf,
  7413. q7_t *tmp_buf)
  7414. {
  7415. #if defined(__zcc__)
  7416. return tpt_nn_conv_HWC_s8_s8_s8_sft_bias(
  7417. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  7418. ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
  7419. out_tensor_dim, in_tmp_buf, tmp_buf);
  7420. #else
  7421. return riscv_nn_conv_HWC_s8_s8_s8_sft_bias(
  7422. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  7423. ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
  7424. out_tensor_dim, in_tmp_buf, tmp_buf);
  7425. #endif
  7426. }
  7427. /**
  7428. * @brief This function performs signed 8-bit integer convolution in
  7429. * any x and y dimensions with shift-based quantization on the
  7430. * outputs.
  7431. * @param[in] in_tensor pointer of the input tensor
  7432. * @param[in] in_tensor_dim_x x dimension of the input tensor
  7433. * @param[in] in_tensor_dim_y y dimension of the input tensor
  7434. * @param[in] in_tensor_ch number of input tensor channels
  7435. * @param[in] ker_weight pointer of kernel weights
  7436. * @param[in] out_tensor_ch number of output tensor channels
  7437. * @param[in] ker_dim_x x dimension of the filter kernel
  7438. * @param[in] ker_dim_y y dimension of the filter kernel
  7439. * @param[in] pad_x padding size in the x dimension
  7440. * @param[in] pad_y padding size in the y dimension
  7441. * @param[in] stride_x convolution stride in the x dimension
  7442. * @param[in] stride_y convolution stride in the y dimension
  7443. * @param[in] bias pointer of the bias vector
  7444. * @param[in] bias_lshift left shift amount for the bias
  7445. * @param[in] out_rshift right shift amount for the output
  7446. * @param[out] out_tensor pointer of the output tensor
  7447. * @param[in] out_tensor_dim_x x dimension of the output tensor
  7448. * @param[in] out_tensor_dim_y y dimension of the output tensor
  7449. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  7450. * It is required when -mext-dsp or
  7451. * -mext-vector is enabled and its size
  7452. * must be equal to "2 * in_tensor_ch *
  7453. * ker_dim_x * ker_dim_y".
  7454. * @param[in] tmp_buf dummy
  7455. *
  7456. * @b Example:
  7457. * @code
  7458. * //Convolve a 160x120x3 input tensor with a 3x5 kernel and generate a 80x59x5
  7459. * //output tensor. Let both dimensions padding be 1 and their stride be 2.
  7460. *
  7461. * #define IN_X 160
  7462. * #define IN_Y 120
  7463. * #define IN_CH 3
  7464. * #define OUT_CH 5
  7465. * #define KER_DIM_X 3
  7466. * #define KER_DIM_Y 5
  7467. * #define PAD_X 1
  7468. * #define PAD_Y 1
  7469. * #define STRIDE_X 2
  7470. * #define STRIDE_Y 2
  7471. * #define BIAS_LSHIFT 6
  7472. * #define OUT_RSHIFT 9
  7473. * #define OUT_X 40
  7474. * #define OUT_Y 30
  7475. *
  7476. * q7_t in_data[IN_CH * IN_X * IN_Y] = {...};
  7477. * q7_t weight[IN_CH * KER_DIM_X * KER_DIM_Y * OUT_CH] = {...};
  7478. * q7_t bias[OUT_CH] = {...};
  7479. * q15_t in_tmp_buf[2 * IN_CH * KER_DIM_X * KER_DIM_Y] = {0};
  7480. * q7_t out_data[OUT_CH * OUT_X * OUT_Y];
  7481. *
  7482. * riscv_nn_conv_HWC_s8_s8_s8_sft_bias_any(in_data, IN_X, IN_Y , IN_CH, weight,
  7483. * OUT_CH, KER_DIM_X, KER_DIM_Y, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y, bias,
  7484. * BIAS_LSHIFT, OUT_RSHIFT, out_data, OUT_X, OUT_Y, in_tmp_buf, NULL);
  7485. * @endcode
  7486. */
  7487. static inline void hpm_nn_conv_HWC_s8_s8_s8_sft_bias_any(const q7_t *in_tensor,
  7488. const uint16_t in_tensor_dim_x,
  7489. const uint16_t in_tensor_dim_y,
  7490. const uint16_t in_tensor_ch,
  7491. const q7_t *ker_weight,
  7492. const uint16_t out_tensor_ch,
  7493. const uint16_t ker_dim_x,
  7494. const uint16_t ker_dim_y,
  7495. const uint16_t pad_x,
  7496. const uint16_t pad_y,
  7497. const uint16_t stride_x,
  7498. const uint16_t stride_y,
  7499. const q7_t *bias,
  7500. const uint16_t bias_lshift,
  7501. const uint16_t out_rshift,
  7502. q7_t *out_tensor,
  7503. const uint16_t out_tensor_dim_x,
  7504. const uint16_t out_tensor_dim_y,
  7505. q15_t *in_tmp_buf,
  7506. q7_t *tmp_buf)
  7507. {
  7508. #if defined(__zcc__)
  7509. tpt_nn_conv_HWC_s8_s8_s8_sft_bias_any(
  7510. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  7511. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  7512. bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
  7513. out_tensor_dim_y, in_tmp_buf, tmp_buf);
  7514. #else
  7515. riscv_nn_conv_HWC_s8_s8_s8_sft_bias_any(
  7516. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  7517. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  7518. bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
  7519. out_tensor_dim_y, in_tmp_buf, tmp_buf);
  7520. #endif
  7521. }
  7522. /**
  7523. * @brief This function performs fast signed 8-bit integer convolution
  7524. * with shift-based quantization on the outputs.
  7525. * @param[in] in_tensor pointer of the input tensor
  7526. * @param[in] in_tensor_dim dimension of the input tensor
  7527. * @param[in] in_tensor_ch number of input tensor channels
  7528. * @param[in] ker_weight pointer of kernel weights
  7529. * @param[in] out_tensor_ch number of output tensor channels
  7530. * @param[in] ker_dim dimension of the filter kernel
  7531. * @param[in] pad padding size
  7532. * @param[in] stride convolution stride
  7533. * @param[in] bias pointer of the bias vector
  7534. * @param[in] bias_lshift left shift amount for the bias
  7535. * @param[in] out_rshift right shift amount for the output
  7536. * @param[out] out_tensor pointer of the output tensor
  7537. * @param[in] out_tensor_dim dimension of the output tensor
  7538. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  7539. * required when -mext-dsp or -mext-vector
  7540. * enabled and its size must be equal to "2 *
  7541. * in_tensor_ch * ker_dim * ker_dim".
  7542. * @param[in] tmp_buf dummy
  7543. * @return This function returns 0 on success; otherwise, it returns -1
  7544. * if its inputs do not meet the constraints that in_tensor_ch
  7545. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  7546. *
  7547. * @b Example:
  7548. * @code
  7549. * //Convolve a 12x12x20 input tensor with a 5x5 kernel and generate a 8x8x50
  7550. * //output tensor. Let both dimensions padding be 0 and their stride be 1.
  7551. *
  7552. * #define IN_DIM 12
  7553. * #define IN_CH 20
  7554. * #define KER_DIM 5
  7555. * #define PAD 0
  7556. * #define STRIDE 1
  7557. * #define BIAS_LSHIFT 6
  7558. * #define OUT_RSHIFT 10
  7559. * #define OUT_CH 50
  7560. * #define OUT_DIM 8
  7561. *
  7562. * q7_t in_data[IN_CH * IN_DIM * IN_DIM] = {...};
  7563. * q7_t weight[IN_CH * KER_DIM * KER_DIM * OUT_CH] = {...};
  7564. * q7_t bias[OUT_CH] = {...};
  7565. * q15_t in_tmp_buf[2 * IN_CH * KER_DIM * KER_DIM] = {0};
  7566. * q7_t out_data[OUT_CH * OUT_DIM * OUT_DIM];
  7567. *
  7568. * riscv_nn_conv_HWC_s8_s8_s8_sft_bias_fast(in_data, IN_DIM, IN_CH, weight,
  7569. * OUT_CH, KER_DIM, PAD, STRIDE, bias, BIAS_LSHIFT, OUT_RSHIFT, out_data,
  7570. * OUT_DIM, in_tmp_buf, NULL);
  7571. * @endcode
  7572. */
  7573. static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast(const q7_t *in_tensor,
  7574. const uint16_t in_tensor_dim,
  7575. const uint16_t in_tensor_ch,
  7576. const q7_t *ker_weight,
  7577. const uint16_t out_tensor_ch,
  7578. const uint16_t ker_dim,
  7579. const uint16_t pad,
  7580. const uint16_t stride,
  7581. const q7_t *bias,
  7582. const uint16_t bias_lshift,
  7583. const uint16_t out_rshift,
  7584. q7_t *out_tensor,
  7585. const uint16_t out_tensor_dim,
  7586. q15_t *in_tmp_buf,
  7587. q7_t *tmp_buf)
  7588. {
  7589. #if defined(__zcc__)
  7590. return tpt_nn_conv_HWC_s8_s8_s8_sft_bias_fast(
  7591. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  7592. ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
  7593. out_tensor_dim, in_tmp_buf, tmp_buf);
  7594. #else
  7595. return riscv_nn_conv_HWC_s8_s8_s8_sft_bias_fast(
  7596. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  7597. ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
  7598. out_tensor_dim, in_tmp_buf, tmp_buf);
  7599. #endif
  7600. }
  7601. /**
  7602. * @brief This function performs fast signed 8-bit integer convolution
  7603. * in any x and y dimensions with shift-based quantization on
  7604. * the outputs.
  7605. * @param[in] in_tensor pointer of the input tensor
  7606. * @param[in] in_tensor_dim_x x dimension of the input tensor
  7607. * @param[in] in_tensor_dim_y y dimension of the input tensor
  7608. * @param[in] in_tensor_ch number of input tensor channels
  7609. * @param[in] ker_weight pointer of kernel weights
  7610. * @param[in] out_tensor_ch number of output tensor channels
  7611. * @param[in] ker_dim_x x dimension of the filter kernel
  7612. * @param[in] ker_dim_y y dimension of the filter kernel
  7613. * @param[in] pad_x padding size in the x dimension
  7614. * @param[in] pad_y padding size in the y dimension
  7615. * @param[in] stride_x convolution stride in the x dimension
  7616. * @param[in] stride_y convolution stride in the y dimension
  7617. * @param[in] bias pointer of the bias vector
  7618. * @param[in] bias_lshift left shift amount for the bias
  7619. * @param[in] out_rshift right shift amount for the output
  7620. * @param[out] out_tensor pointer of the output tensor
  7621. * @param[in] out_tensor_dim_x x dimension of the output tensor
  7622. * @param[in] out_tensor_dim_y y dimension of the output tensor
  7623. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  7624. * It is required when -mext-dsp or
  7625. * -mext-vector enabled and its size must
  7626. * be equal to "2 * in_tensor_ch * ker_dim_x
  7627. * * ker_dim_y".
  7628. * @param[in] tmp_buf dummy
  7629. * @return This function returns 0 on success; otherwise, it returns -1
  7630. * if its inputs do not meet the constraints that in_tensor_ch
  7631. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  7632. *
  7633. * @b Example:
  7634. * @code
  7635. * //Convolve a 160x120x20 input tensor with a 3x5 kernel and generate a
  7636. * //80x59x8 output tensor. Let both dimensions padding be 1 and their stride
  7637. * //be 2.
  7638. *
  7639. * #define IN_X 160
  7640. * #define IN_Y 120
  7641. * #define IN_CH 20
  7642. * #define OUT_CH 8
  7643. * #define KER_DIM_X 3
  7644. * #define KER_DIM_Y 5
  7645. * #define PAD_X 1
  7646. * #define PAD_Y 1
  7647. * #define STRIDE_X 2
  7648. * #define STRIDE_Y 2
  7649. * #define BIAS_LSHIFT 6
  7650. * #define OUT_RSHIFT 9
  7651. * #define OUT_X 80
  7652. * #define OUT_Y 59
  7653. *
  7654. * q7_t in_data[IN_CH * IN_X * IN_Y] = {...};
  7655. * q7_t weight[IN_CH * KER_DIM_X * KER_DIM_Y * OUT_CH] = {...};
  7656. * q7_t bias[OUT_CH] = {...};
  7657. * q15_t in_tmp_buf[2 * IN_CH * KER_DIM_X * KER_DIM_Y] = {0};
  7658. * q7_t out_data[OUT_CH * OUT_Y * OUT_X];
  7659. *
  7660. * riscv_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(in_data, IN_W, IN_Y , IN_CH,
  7661. * weight, OUT_CH, KER_DIM_X, KER_DIM_Y, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y,
  7662. * bias, BIAS_LSHIFT, OUT_RSHIFT, out_data, OUT_X, OUT_Y, in_tmp_buf,
  7663. * NULL);
  7664. * @endcode
  7665. */
  7666. static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(const q7_t *in_tensor,
  7667. const uint16_t in_tensor_dim_x,
  7668. const uint16_t in_tensor_dim_y,
  7669. const uint16_t in_tensor_ch,
  7670. const q7_t *ker_weight,
  7671. const uint16_t out_tensor_ch,
  7672. const uint16_t ker_dim_x,
  7673. const uint16_t ker_dim_y,
  7674. const uint16_t pad_x,
  7675. const uint16_t pad_y,
  7676. const uint16_t stride_x,
  7677. const uint16_t stride_y,
  7678. const q7_t *bias,
  7679. const uint16_t bias_lshift,
  7680. const uint16_t out_rshift,
  7681. q7_t *out_tensor,
  7682. const uint16_t out_tensor_dim_x,
  7683. const uint16_t out_tensor_dim_y,
  7684. q15_t *in_tmp_buf,
  7685. q7_t *tmp_buf)
  7686. {
  7687. #if defined(__zcc__)
  7688. return tpt_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(
  7689. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  7690. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  7691. bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
  7692. out_tensor_dim_y, in_tmp_buf, tmp_buf);
  7693. #else
  7694. return riscv_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(
  7695. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  7696. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  7697. bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
  7698. out_tensor_dim_y, in_tmp_buf, tmp_buf);
  7699. #endif
  7700. }
  7701. /**
  7702. * @brief This function performs signed 16-bit integer convolution
  7703. * with shift-based quantization on the outputs.
  7704. * @param[in] in_tensor pointer of the input tensor
  7705. * @param[in] in_tensor_dim dimension of the input tensor
  7706. * @param[in] in_tensor_ch number of input tensor channels
  7707. * @param[in] ker_weight pointer of kernel weights
  7708. * @param[in] out_tensor_ch number of output tensor channels
  7709. * @param[in] ker_dim dimension of the filter kernel
  7710. * @param[in] pad padding size
  7711. * @param[in] stride convolution stride
  7712. * @param[in] bias pointer of the bias vector
  7713. * @param[in] bias_lshift left shift amount for the bias
  7714. * @param[in] out_rshift right shift amount for the output
  7715. * @param[out] out_tensor pointer of the output tensor
  7716. * @param[in] out_tensor_dim dimension of the output tensor
  7717. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  7718. * required when -mext-dsp or -mext-vector is
  7719. * enabled and its size must be equal to
  7720. * "in_tensor_ch * ker_dim * ker_dim".
  7721. * @param[in] tmp_buf dummy
  7722. * @return This function only returns 0.
  7723. *
  7724. * @b Example:
  7725. * @code
  7726. * //Convolve a 28x28x1 input tensor with a 5x5 kernel and generate a 24x24x20
  7727. * //output tensor. Let both dimensions padding be 0 and their stride be 1.
  7728. *
  7729. * #define IN_DIM 28
  7730. * #define IN_CH 1
  7731. * #define KER_DIM 5
  7732. * #define PAD 0
  7733. * #define STRIDE 1
  7734. * #define BIAS_LSHIFT 6
  7735. * #define OUT_RSHIFT 10
  7736. * #define OUT_CH 20
  7737. * #define OUT_DIM 24
  7738. *
  7739. * q15_t input_data[IN_CH * IN_DIM * IN_DIM] = {...};
  7740. * q15_t weight[IN_CH * KER_DIM * KER_DIM * OUT_CH] = {...};
  7741. * q15_t bias[OUT_CH] = {...};
  7742. * q15_t in_tmp_buf[IN_CH * KER_DIM * KER_DIM] = {0};
  7743. * q15_t out_data[OUT_CH * OUT_DIM * OUT_DIM];
  7744. *
  7745. * riscv_nn_conv_HWC_s16_s16_s16_sft_bias(input_data, IN_DIM, IN_CH, weight,
  7746. * OUT_CH, KER_DIM, PAD, STRIDE, bias, BIAS_LSHIFT, OUT_RSHIFT, out_data,
  7747. * OUT_DIM, in_tmp_buf, NULL);
  7748. * @endcode
  7749. */
  7750. static inline int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias(const q15_t *in_tensor,
  7751. const uint16_t in_tensor_dim,
  7752. const uint16_t in_tensor_ch,
  7753. const q15_t *ker_weight,
  7754. const uint16_t out_tensor_ch,
  7755. const uint16_t ker_dim,
  7756. const uint16_t pad,
  7757. const uint16_t stride,
  7758. const q15_t *bias,
  7759. const uint16_t bias_lshift,
  7760. const uint16_t out_rshift,
  7761. q15_t *out_tensor,
  7762. const uint16_t out_tensor_dim,
  7763. q15_t *in_tmp_buf,
  7764. q7_t *tmp_buf)
  7765. {
  7766. #if defined(__zcc__)
  7767. return tpt_nn_conv_HWC_s16_s16_s16_sft_bias(
  7768. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  7769. ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
  7770. out_tensor_dim, in_tmp_buf, tmp_buf);
  7771. #else
  7772. return riscv_nn_conv_HWC_s16_s16_s16_sft_bias(
  7773. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  7774. ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
  7775. out_tensor_dim, in_tmp_buf, tmp_buf);
  7776. #endif
  7777. }
  7778. /**
  7779. * @brief This function performs fast signed 16-bit integer
  7780. * convolution with shift-based quantization on the outputs.
  7781. * @param[in] in_tensor pointer of the input tensor
  7782. * @param[in] in_tensor_dim dimension of the input tensor
  7783. * @param[in] in_tensor_ch number of input tensor channels
  7784. * @param[in] ker_weight pointer of kernel weights
  7785. * @param[in] out_tensor_ch number of output tensor channels
  7786. * @param[in] ker_dim dimension of the filter kernel
  7787. * @param[in] pad padding size
  7788. * @param[in] stride convolution stride
  7789. * @param[in] bias pointer of the bias vector
  7790. * @param[in] bias_lshift left shift amount for the bias
  7791. * @param[in] out_rshift right shift amount for the output
  7792. * @param[out] out_tensor pointer of the output tensor
  7793. * @param[in] out_tensor_dim dimension of the output tensor
  7794. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  7795. * required when -mext-dsp or -mext-vector is
  7796. * enabled and its size must be equal to "2 *
  7797. * in_tensor_ch * ker_dim * ker_dim".
  7798. * @param[in] tmp_buf dummy
  7799. * @return This function returns 0 on success; otherwise, it returns -1
  7800. * if its inputs do not meet the constraints that both
  7801. * in_tensor_ch and out_tensor_ch are multiple of 2.
  7802. *
  7803. * @b Example:
  7804. * @code
  7805. * //Convolve a 28x28x4 input tensor with a 5x5 kernel and generate a 24x24x8
  7806. * //output tensor. Let both dimensions padding be 0 and their stride be 1.
  7807. *
  7808. * #define IN_DIM 28
  7809. * #define IN_CH 4
  7810. * #define KER_DIM 5
  7811. * #define PAD 0
  7812. * #define STRIDE 1
  7813. * #define BIAS_LSHIFT 6
  7814. * #define OUT_RSHIFT 10
  7815. * #define OUT_CH 8
  7816. * #define OUT_DIM 24
  7817. *
  7818. * q15_t in_data[IN_CH * IN_DIM * IN_DIM] = {...};
  7819. * q15_t weight[IN_CH * KER_DIM * KER_DIM * OUT_CH] = {...};
  7820. * q15_t bias[OUT_CH] = {...};
  7821. * q15_t in_tmp_buf[IN_CH * KER_DIM * KER_DIM] = {0};
  7822. * q15_t out_data[OUT_CH * OUT_DIM * OUT_DIM];
  7823. *
  7824. * riscv_nn_conv_HWC_s16_s16_s16_sft_bias_fast(in_data, IN_DIM, IN_CH, weight,
  7825. * OUT_CH, KER_DIM, PAD, STRIDE, bias, BIAS_LSHIFT, OUT_RSHIFT, out_data,
  7826. * OUT_DIM, in_tmp_buf, NULL);
  7827. * @endcode
  7828. */
  7829. static inline int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast(const q15_t *in_tensor,
  7830. const uint16_t in_tensor_dim,
  7831. const uint16_t in_tensor_ch,
  7832. const q15_t *ker_weight,
  7833. const uint16_t out_tensor_ch,
  7834. const uint16_t ker_dim,
  7835. const uint16_t pad,
  7836. const uint16_t stride,
  7837. const q15_t *bias,
  7838. const uint16_t bias_lshift,
  7839. const uint16_t out_rshift,
  7840. q15_t *out_tensor,
  7841. const uint16_t out_tensor_dim,
  7842. q15_t *in_tmp_buf,
  7843. q7_t *tmp_buf)
  7844. {
  7845. #if defined(__zcc__)
  7846. return tpt_nn_conv_HWC_s16_s16_s16_sft_bias_fast(
  7847. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  7848. ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
  7849. out_tensor_dim, in_tmp_buf, tmp_buf);
  7850. #else
  7851. return riscv_nn_conv_HWC_s16_s16_s16_sft_bias_fast(
  7852. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  7853. ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
  7854. out_tensor_dim, in_tmp_buf, tmp_buf);
  7855. #endif
  7856. }
  7857. /**
  7858. * @brief This function performs fast signed 16-bit integer
  7859. * convolution in any x and y dimensions with shift-based
  7860. * quantization on the outputs.
  7861. * @param[in] in_tensor pointer of the input tensor
  7862. * @param[in] in_tensor_dim_x x dimension of the input tensor
  7863. * @param[in] in_tensor_dim_y y dimension of the input tensor
  7864. * @param[in] in_tensor_ch number of input tensor channels
  7865. * @param[in] ker_weight pointer of kernel weights
  7866. * @param[in] out_tensor_ch number of output tensor channels
  7867. * @param[in] ker_dim_x x dimension of the filter kernel
  7868. * @param[in] ker_dim_y y dimension of the filter kernel
  7869. * @param[in] pad_x padding size in the x dimension
  7870. * @param[in] pad_y padding size in the y dimension
  7871. * @param[in] stride_x convolution stride in the x dimension
  7872. * @param[in] stride_y convolution stride in the y dimension
  7873. * @param[in] bias pointer of the bias vector
  7874. * @param[in] bias_lshift left shift amount for the bias
  7875. * @param[in] out_rshift right shift amount for the output
  7876. * @param[out] out_tensor pointer of the output tensor
  7877. * @param[in] out_tensor_dim_x x dimension of the output tensor
  7878. * @param[in] out_tensor_dim_y y dimension of the output tensor
  7879. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  7880. * It is required when -mext-dsp or
  7881. * -mext-vector is enabled and its size
  7882. * must be equal to "2 * in_tensor_ch *
  7883. * ker_dim_x * ker_dim_y".
  7884. * @param[in] tmp_buf dummy
  7885. * @return This function returns 0 on success; otherwise, it returns -1
  7886. * if its inputs do not meet the constraints that both
  7887. * in_tensor_ch and out_tensor_ch are multiple of 2.
  7888. *
  7889. * @b Example:
  7890. * @code
  7891. * //Convolve a 160x120x20 input tensor with a 3x5 kernel and generate a
  7892. * //80x59x8 output tensor. Let both dimensions padding be 1 and their stride
  7893. * //be 2.
  7894. *
  7895. * #define IN_X 160
  7896. * #define IN_Y 120
  7897. * #define IN_CH 20
  7898. * #define OUT_CH 8
  7899. * #define KER_DIM_X 3
  7900. * #define KER_DIM_Y 5
  7901. * #define PAD_X 1
  7902. * #define PAD_Y 1
  7903. * #define STRIDE_X 2
  7904. * #define STRIDE_Y 2
  7905. * #define BIAS_LSHIFT 6
  7906. * #define OUT_RSHIFT 9
  7907. * #define OUT_X 80
  7908. * #define OUT_Y 59
  7909. *
  7910. * q15_t in_data[IN_CH * IN_X * IN_Y] = {...};
  7911. * q15_t weight[IN_CH * KER_DIM_X * KER_DIM_Y * OUT_CH] = {...};
  7912. * q15_t bias[OUT_CH] = {...};
  7913. * q15_t in_tmp_buf[2 * IN_CH * KER_DIM_X * KER_DIM_Y] = {0};
  7914. * q15_t out_data[OUT_CH * OUT_X * OUT_Y];
  7915. *
  7916. * riscv_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(in_data, IN_X, IN_Y , IN_CH,
  7917. * weight, OUT_CH, KER_DIM_X, KER_DIM_Y, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y,
  7918. * bias, BIAS_LSHIFT, OUT_RSHIFT, out_data, OUT_X, OUT_Y, in_tmp_buf,
  7919. * NULL);
  7920. * @endcode
  7921. */
  7922. static inline int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(const q15_t *in_tensor,
  7923. const uint16_t in_tensor_dim_x,
  7924. const uint16_t in_tensor_dim_y,
  7925. const uint16_t in_tensor_ch,
  7926. const q15_t *ker_weight,
  7927. const uint16_t out_tensor_ch,
  7928. const uint16_t ker_dim_x,
  7929. const uint16_t ker_dim_y,
  7930. const uint16_t pad_x,
  7931. const uint16_t pad_y,
  7932. const uint16_t stride_x,
  7933. const uint16_t stride_y,
  7934. const q15_t *bias,
  7935. const uint16_t bias_lshift,
  7936. const uint16_t out_rshift,
  7937. q15_t *out_tensor,
  7938. const uint16_t out_tensor_dim_x,
  7939. const uint16_t out_tensor_dim_y,
  7940. q15_t *in_tmp_buf,
  7941. q7_t *tmp_buf)
  7942. {
  7943. #if defined(__zcc__)
  7944. return tpt_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(
  7945. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  7946. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  7947. bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
  7948. out_tensor_dim_y, in_tmp_buf, tmp_buf);
  7949. #else
  7950. return riscv_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(
  7951. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  7952. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  7953. bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
  7954. out_tensor_dim_y, in_tmp_buf, tmp_buf);
  7955. #endif
  7956. }
  7957. /**
  7958. * @brief This function performs signed 8-bit integer depthwise
  7959. * convolution with shift-based quantization on the outputs.
  7960. * @param[in] in_tensor pointer of the input tensor
  7961. * @param[in] in_tensor_dim dimension of the input tensor
  7962. * @param[in] in_tensor_ch number of input tensor channels
  7963. * @param[in] ker_weight pointer of kernel weights
  7964. * @param[in] out_tensor_ch number of output tensor channels
  7965. * @param[in] ker_dim dimension of the filter kernel
  7966. * @param[in] pad padding size
  7967. * @param[in] stride convolution stride
  7968. * @param[in] bias pointer of the bias vector
  7969. * @param[in] bias_lshift left shift amount for the bias
  7970. * @param[in] out_rshift right shift amount for the output
  7971. * @param[out] out_tensor pointer of the output tensor
  7972. * @param[in] out_tensor_dim dimension of the output tensor
  7973. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  7974. * required when -mext-dsp or -mext-vector is
  7975. * enabled and its size must be equal to
  7976. * "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
  7977. * @param[in] tmp_buf dummy
  7978. * @return This function returns 0 on success; otherwise, it returns -1
  7979. * if its inputs do not meet the constraints that in_tensor_ch
  7980. * has to be equal to out_tensor_ch.
  7981. *
  7982. * @b Example:
  7983. * @code
  7984. * //Convolve a 11x11x28 input tensor with a 3x3 kernel and generate a 9x9x48
  7985. * //output tensor. Let both dimensions padding be 0 and their stride be 1.
  7986. *
  7987. * #define IN_DIM 11
  7988. * #define IN_CH 28
  7989. * #define OUT_CH 48
  7990. * #define KER_DIM 3
  7991. * #define PAD 0
  7992. * #define STRIDE 1
  7993. * #define OUT_RSHIFT 7
  7994. * #define OUT_DIM 9
  7995. *
  7996. * q7_t in_data[IN_CH * IN_DIM * IN_DIM] = {...};
  7997. * q7_t weight[IN_CH * KER_DIM * KER_DIM * IN_CH] = {...};
  7998. * q7_t bias[IN_CH] = {...};
  7999. * q15_t in_tmp_buf[2 * OUT_CH * KER_DIM * KER_DIM] = {0};
  8000. * q7_t out_data[OUT_CH * OUT_DIM * OUT_DIM];
  8001. *
  8002. * riscv_nn_conv_dw_HWC_s8_s8_s8_sft_bias(in_data, IN_DIM, IN_CH, weight,
  8003. * OUT_CH, KER_DIM, PAD, STRIDE, bias, 0, OUT_RSHIFT, out_data, OUT_DIM,
  8004. * in_tmp_buf, NULL);
  8005. * @endcode
  8006. */
  8007. static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias(const q7_t *in_tensor,
  8008. const uint16_t in_tensor_dim,
  8009. const uint16_t in_tensor_ch,
  8010. const q7_t *ker_weight,
  8011. const uint16_t out_tensor_ch,
  8012. const uint16_t ker_dim,
  8013. const uint16_t pad,
  8014. const uint16_t stride,
  8015. const q7_t *bias,
  8016. const uint16_t bias_lshift,
  8017. const uint16_t out_rshift,
  8018. q7_t *out_tensor,
  8019. const uint16_t out_tensor_dim,
  8020. q15_t *in_tmp_buf,
  8021. q7_t *tmp_buf)
  8022. {
  8023. #if defined(__zcc__)
  8024. return tpt_nn_conv_dw_HWC_s8_s8_s8_sft_bias(
  8025. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  8026. ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
  8027. out_tensor_dim, in_tmp_buf, tmp_buf);
  8028. #else
  8029. return riscv_nn_conv_dw_HWC_s8_s8_s8_sft_bias(
  8030. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  8031. ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
  8032. out_tensor_dim, in_tmp_buf, tmp_buf);
  8033. #endif
  8034. }
  8035. /**
  8036. * @brief This function performs signed 8-bit integer depthwise
  8037. * convolution in any x and y dimensions with shift-based
  8038. * quantization on the outputs.
  8039. * @param[in] in_tensor pointer of the input tensor
  8040. * @param[in] in_tensor_dim_x x dimension of the input tensor
  8041. * @param[in] in_tensor_dim_y y dimension of the input tensor
  8042. * @param[in] in_tensor_ch number of input tensor channels
  8043. * @param[in] ker_weight pointer of kernel weights
  8044. * @param[in] out_tensor_ch number of output tensor channels
  8045. * @param[in] ker_dim_x x dimension of the filter kernel
  8046. * @param[in] ker_dim_y y dimension of the filter kernel
  8047. * @param[in] pad_x padding size in the x dimension
  8048. * @param[in] pad_y padding size in the y dimension
  8049. * @param[in] stride_x convolution stride in the x dimension
  8050. * @param[in] stride_y convolution stride in the y dimension
  8051. * @param[in] bias pointer of the bias vector
  8052. * @param[in] bias_lshift left shift amount for the bias
  8053. * @param[in] out_rshift right shift amount for the output
  8054. * @param[out] out_tensor pointer of the output tensor
  8055. * @param[in] out_tensor_dim_x x dimension of the output tensor
  8056. * @param[in] out_tensor_dim_y y dimension of the output tensor
  8057. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  8058. * It is required when -mext-dsp or
  8059. * -mext-vector is enabled and its size
  8060. * must be equal to "(in_tensor_ch *
  8061. * ker_dim_x * ker_dim_y + 1) / 2".
  8062. * @param[in] tmp_buf dummy
  8063. * @return This function returns 0 on success; otherwise, it returns -1
  8064. * if its inputs do not meet the constraints that in_tensor_ch
  8065. * must be equal to out_tensor_ch.
  8066. *
  8067. * @b Example:
  8068. * @code
  8069. * //Perform a depth-wise convolution for a 79x59x12 input tensor with a 3x3
  8070. * //kernel and generate a 77x57x12 output tensor. Let both dimensions padding
  8071. * //be 0 and their stride be 1.
  8072. *
  8073. * #define IN_DIM_X 79
  8074. * #define IN_DIM_Y 59
  8075. * #define IN_CH 12
  8076. * #define OUT_CH 12
  8077. * #define KER_DIM 3
  8078. * #define PAD 0
  8079. * #define STRIDE 1
  8080. * #define BIAS_SHIFT 0
  8081. * #define OUT_RSHIFT 7
  8082. * #define OUT_DIM_X 77
  8083. * #define OUT_DIM_Y 57
  8084. *
  8085. * q7_t in_data[IN_CH * IN_DIM_X * IN_DIM_Y] = {...};
  8086. * q7_t weight[IN_CH * KER_DIM * KER_DIM * IN_CH] = {...};
  8087. * q7_t bias[IN_CH] = {...};
  8088. * q15_t in_tmp_buf[2 * OUT_CH * KER_DIM * KER_DIM] = {0};
  8089. * q7_t out_data[OUT_CH * OUT_DIM_X * OUT_DIM_Y];
  8090. *
  8091. * riscv_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(in_data, IN_DIM_X, IN_DIM_Y,
  8092. * IN_CH, weight, OUT_CH, KER_DIM, KER_DIM, PAD, PAD, STRIDE, STRIDE, bias,
  8093. * BIAS_SHIFT, OUT_RSHIFT, out_data, OUT_DIM_X, OUT_DIM_Y, in_tmp_buf,
  8094. * NULL);
  8095. * @endcode
  8096. */
  8097. static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(const q7_t *in_tensor,
  8098. const uint16_t in_tensor_dim_x,
  8099. const uint16_t in_tensor_dim_y,
  8100. const uint16_t in_tensor_ch,
  8101. const q7_t *ker_weight,
  8102. const uint16_t out_tensor_ch,
  8103. const uint16_t ker_dim_x,
  8104. const uint16_t ker_dim_y,
  8105. const uint16_t pad_x,
  8106. const uint16_t pad_y,
  8107. const uint16_t stride_x,
  8108. const uint16_t stride_y,
  8109. const q7_t *bias,
  8110. const uint16_t bias_lshift,
  8111. const uint16_t out_rshift,
  8112. q7_t *out_tensor,
  8113. const uint16_t out_tensor_dim_x,
  8114. const uint16_t out_tensor_dim_y,
  8115. q15_t *in_tmp_buf,
  8116. q7_t *tmp_buf)
  8117. {
  8118. #if defined(__zcc__)
  8119. return tpt_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(
  8120. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8121. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8122. bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
  8123. out_tensor_dim_y, in_tmp_buf, tmp_buf);
  8124. #else
  8125. return riscv_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(
  8126. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8127. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8128. bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
  8129. out_tensor_dim_y, in_tmp_buf, tmp_buf);
  8130. #endif
  8131. }
  8132. /**
  8133. * @brief This function performs 1x1 kernels convolution for signed
  8134. * 8-bit integer inputs/outputs in any x and y dimensions with
  8135. * bias inputs and symmetric quantization on the outputs..
  8136. * @param[in] in_tensor pointer of the input tensor
  8137. * @param[in] in_tensor_dim_x x dimension of the input tensor
  8138. * @param[in] in_tensor_dim_y y dimension of the input tensor
  8139. * @param[in] in_tensor_ch number of input tensor channels
  8140. * @param[in] ker_weight pointer of kernel weights
  8141. * @param[in] out_tensor_ch number of output tensor channels
  8142. * @param[in] ker_dim_x x dimension of the filter kernel
  8143. * @param[in] ker_dim_y y dimension of the filter kernel
  8144. * @param[in] pad_x padding size in the x dimension
  8145. * @param[in] pad_y padding size in the y dimension
  8146. * @param[in] stride_x convolution stride in the x dimension
  8147. * @param[in] stride_y convolution stride in the y dimension
  8148. * @param[in] bias pointer of the bias vector
  8149. * @param[in] pre_rshift right shift amount for the output
  8150. * @param[in] out_scale value of scaling for the output
  8151. * @param[in] post_rshift right shift amount for the output
  8152. * @param[out] out_tensor pointer of the output tensor
  8153. * @param[in] out_tensor_dim_x x dimension of the output tensor
  8154. * @param[in] out_tensor_dim_y y dimension of the output tensor
  8155. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  8156. * It is required when -mext-dsp or
  8157. * -mext-vector is enabled and its size
  8158. * must be equal to "2 * in_tensor_ch *
  8159. * ker_dim_x * ker_dim_y".
  8160. * @return This function returns 0 on success; otherwise, it returns -1
  8161. * if its inputs do not meet the constraints (see the Note
  8162. * below for details).
  8163. *
  8164. * @note
  8165. * - The input constraints of this function are:
  8166. * - in_tensor_ch is a multiple of 4
  8167. * - out_tensor_ch is a multiple of 2
  8168. * - ker_dim_x is 1
  8169. * - ker_dim_y is 1
  8170. * - pad_x is 0
  8171. * - pad_y is 0
  8172. * - stride_x is 1
  8173. * - stride_y is 1
  8174. * - The outputs will be 2-stage shifted before being stored, i.e.,
  8175. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  8176. */
  8177. static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(const q7_t *in_tensor,
  8178. const uint16_t in_tensor_dim_x,
  8179. const uint16_t in_tensor_dim_y,
  8180. const uint16_t in_tensor_ch,
  8181. const q7_t *ker_weight,
  8182. const uint16_t out_tensor_ch,
  8183. const uint16_t ker_dim_x,
  8184. const uint16_t ker_dim_y,
  8185. const uint16_t pad_x,
  8186. const uint16_t pad_y,
  8187. const uint16_t stride_x,
  8188. const uint16_t stride_y,
  8189. const q31_t *bias,
  8190. const uint16_t pre_rshift,
  8191. const uint16_t out_scale,
  8192. const uint16_t post_rshift,
  8193. q7_t *out_tensor,
  8194. const uint16_t out_tensor_dim_x,
  8195. const uint16_t out_tensor_dim_y,
  8196. q15_t *in_tmp_buf)
  8197. {
  8198. #if defined(__zcc__)
  8199. tpt_nn_conv_1x1_sym_params S1 = {stride_x, stride_y, pad_x, pad_y, pre_rshift, out_scale, post_rshift};
  8200. tpt_nn_1x1_sym_dims S2 = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_dim_x, ker_dim_y,
  8201. out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
  8202. return tpt_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(
  8203. out_tensor_ch, in_tensor, ker_weight, bias, &S1, &S2, in_tmp_buf);
  8204. #else
  8205. return riscv_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(
  8206. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8207. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8208. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8209. out_tensor_dim_y, in_tmp_buf);
  8210. #endif
  8211. }
  8212. /**
  8213. * @brief This function performs 1x1 kernels convolution for signed
  8214. * 8-bit integer inputs and signed 16-bit integer outputs in
  8215. * any x and y dimensions with bias inputs and symmetric
  8216. * quantization on the outputs.
  8217. * @param[in] in_tensor pointer of the input tensor
  8218. * @param[in] in_tensor_dim_x x dimension of the input tensor
  8219. * @param[in] in_tensor_dim_y y dimension of the input tensor
  8220. * @param[in] in_tensor_ch number of input tensor channels
  8221. * @param[in] ker_weight pointer of kernel weights
  8222. * @param[in] out_tensor_ch number of output tensor channels
  8223. * @param[in] ker_dim_x x dimension of the filter kernel
  8224. * @param[in] ker_dim_y y dimension of the filter kernel
  8225. * @param[in] pad_x padding size in the x dimension
  8226. * @param[in] pad_y padding size in the y dimension
  8227. * @param[in] stride_x convolution stride in the x dimension
  8228. * @param[in] stride_y convolution stride in the y dimension
  8229. * @param[in] bias pointer of the bias vector
  8230. * @param[in] pre_rshift right shift amount for the output
  8231. * @param[in] out_scale value of scaling for the output
  8232. * @param[in] post_rshift right shift amount for the output
  8233. * @param[out] out_tensor pointer of the output tensor
  8234. * @param[in] out_tensor_dim_x x dimension of the output tensor
  8235. * @param[in] out_tensor_dim_y y dimension of the output tensor
  8236. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  8237. * It is required when -mext-dsp or
  8238. * -mext-vector is enabled and its size
  8239. * must be equal to "2 * in_tensor_ch *
  8240. * ker_dim_x * ker_dim_y".
  8241. * @return This function returns 0 on success; otherwise, it returns -1
  8242. * if its inputs do not meet the constraints (see the Note
  8243. * below for details).
  8244. *
  8245. * @note
  8246. * - The input constraints of this function are:
  8247. * - in_tensor_ch is a multiple of 4
  8248. * - out_tensor_ch is a multiple of 2
  8249. * - ker_dim_x is 1
  8250. * - ker_dim_y is 1
  8251. * - pad_x is 0
  8252. * - pad_y is 0
  8253. * - stride_x is 1
  8254. * - stride_y is 1
  8255. * - The outputs will be 2-stage shifted before being stored, i.e.,
  8256. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  8257. */
  8258. static inline int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(const q7_t *in_tensor,
  8259. const uint16_t in_tensor_dim_x,
  8260. const uint16_t in_tensor_dim_y,
  8261. const uint16_t in_tensor_ch,
  8262. const q7_t *ker_weight,
  8263. const uint16_t out_tensor_ch,
  8264. const uint16_t ker_dim_x,
  8265. const uint16_t ker_dim_y,
  8266. const uint16_t pad_x,
  8267. const uint16_t pad_y,
  8268. const uint16_t stride_x,
  8269. const uint16_t stride_y,
  8270. const q31_t *bias,
  8271. const uint16_t pre_rshift,
  8272. const uint16_t out_scale,
  8273. const uint16_t post_rshift,
  8274. q15_t *out_tensor,
  8275. const uint16_t out_tensor_dim_x,
  8276. const uint16_t out_tensor_dim_y,
  8277. q15_t *in_tmp_buf)
  8278. {
  8279. #if defined(__zcc__)
  8280. return tpt_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(
  8281. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8282. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8283. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8284. out_tensor_dim_y, in_tmp_buf);
  8285. #else
  8286. return riscv_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(
  8287. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8288. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8289. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8290. out_tensor_dim_y, in_tmp_buf);
  8291. #endif
  8292. }
  8293. /**
  8294. * @brief This function performs 1x1 kernels convolution for unsigned
  8295. * 8-bit integer inputs/outputs in any x and y dimensions with
  8296. * bias inputs and symmetric quantization on the outputs.
  8297. * @param[in] in_tensor pointer of the input tensor
  8298. * @param[in] in_tensor_dim_x x dimension of the input tensor
  8299. * @param[in] in_tensor_dim_y y dimension of the input tensor
  8300. * @param[in] in_tensor_ch number of input tensor channels
  8301. * @param[in] ker_weight pointer of kernel weights
  8302. * @param[in] out_tensor_ch number of output tensor channels
  8303. * @param[in] ker_dim_x x dimension of the filter kernel
  8304. * @param[in] ker_dim_y y dimension of the filter kernel
  8305. * @param[in] pad_x padding size in the x dimension
  8306. * @param[in] pad_y padding size in the y dimension
  8307. * @param[in] stride_x convolution stride in the x dimension
  8308. * @param[in] stride_y convolution stride in the y dimension
  8309. * @param[in] bias pointer of the bias vector
  8310. * @param[in] pre_rshift right shift amount for the output
  8311. * @param[in] out_scale value of scaling for the output
  8312. * @param[in] post_rshift right shift amount for the output
  8313. * @param[out] out_tensor pointer of the output tensor
  8314. * @param[in] out_tensor_dim_x x dimension of the output tensor
  8315. * @param[in] out_tensor_dim_y y dimension of the output tensor
  8316. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  8317. * It is required when -mext-dsp or
  8318. * -mext-vector is enabled and its size
  8319. * must be equal to "2 * in_tensor_ch *
  8320. * ker_dim_x * ker_dim_y".
  8321. * @return This function returns 0 on success; otherwise, it returns -1
  8322. * if its inputs do not meet the constraints (see the Note
  8323. * below for details).
  8324. *
  8325. * @note
  8326. * - The input constraints of this function are:
  8327. * - in_tensor_ch is a multiple of 4
  8328. * - out_tensor_ch is a multiple of 2
  8329. * - ker_dim_x is 1
  8330. * - ker_dim_y is 1
  8331. * - pad_x is 0
  8332. * - pad_y is 0
  8333. * - stride_x is 1
  8334. * - stride_y is 1
  8335. * - The outputs will be 2-stage shifted before being stored, i.e.,
  8336. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  8337. */
  8338. static inline int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(const u8_t *in_tensor,
  8339. const uint16_t in_tensor_dim_x,
  8340. const uint16_t in_tensor_dim_y,
  8341. const uint16_t in_tensor_ch,
  8342. const q7_t *ker_weight,
  8343. const uint16_t out_tensor_ch,
  8344. const uint16_t ker_dim_x,
  8345. const uint16_t ker_dim_y,
  8346. const uint16_t pad_x,
  8347. const uint16_t pad_y,
  8348. const uint16_t stride_x,
  8349. const uint16_t stride_y,
  8350. const q31_t *bias,
  8351. const uint16_t pre_rshift,
  8352. const uint16_t out_scale,
  8353. const uint16_t post_rshift,
  8354. u8_t *out_tensor,
  8355. const uint16_t out_tensor_dim_x,
  8356. const uint16_t out_tensor_dim_y,
  8357. q15_t *in_tmp_buf)
  8358. {
  8359. #if defined(__zcc__)
  8360. return tpt_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(
  8361. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8362. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8363. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8364. out_tensor_dim_y, in_tmp_buf);
  8365. #else
  8366. return riscv_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(
  8367. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8368. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8369. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8370. out_tensor_dim_y, in_tmp_buf);
  8371. #endif
  8372. }
  8373. /**
  8374. * @brief This function performs 1x1 kernels convolution for unsigned
  8375. * 8-bit integer inputs and signed 8-bit integer outputs in any
  8376. * x and y dimensions with bias inputs and symmetric
  8377. * quantization on the outputs.
  8378. * @param[in] in_tensor pointer of the input tensor
  8379. * @param[in] in_tensor_dim_x x dimension of the input tensor
  8380. * @param[in] in_tensor_dim_y y dimension of the input tensor
  8381. * @param[in] in_tensor_ch number of input tensor channels
  8382. * @param[in] ker_weight pointer of kernel weights
  8383. * @param[in] out_tensor_ch number of output tensor channels
  8384. * @param[in] ker_dim_x x dimension of the filter kernel
  8385. * @param[in] ker_dim_y y dimension of the filter kernel
  8386. * @param[in] pad_x padding size in the x dimension
  8387. * @param[in] pad_y padding size in the y dimension
  8388. * @param[in] stride_x convolution stride in the x dimension
  8389. * @param[in] stride_y convolution stride in the y dimension
  8390. * @param[in] bias pointer of the bias vector
  8391. * @param[in] pre_rshift right shift amount for the output
  8392. * @param[in] out_scale value of scaling for the output
  8393. * @param[in] post_rshift right shift amount for the output
  8394. * @param[out] out_tensor pointer of the output tensor
  8395. * @param[in] out_tensor_dim_x x dimension of the output tensor
  8396. * @param[in] out_tensor_dim_y y dimension of the output tensor
  8397. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  8398. * It is required when -mext-dsp or
  8399. * -mext-vector is enabled and its size
  8400. * must be equal to "2 * in_tensor_ch *
  8401. * ker_dim_x * ker_dim_y".
  8402. * @return This function returns 0 on success; otherwise, it returns -1
  8403. * if its inputs do not meet the constraints (see the Note
  8404. * below for details).
  8405. *
  8406. * @note
  8407. * - The input constraints of this function are:
  8408. * - in_tensor_ch is a multiple of 4
  8409. * - out_tensor_ch is a multiple of 2
  8410. * - ker_dim_x is 1
  8411. * - ker_dim_y is 1
  8412. * - pad_x is 0
  8413. * - pad_y is 0
  8414. * - stride_x is 1
  8415. * - stride_y is 1
  8416. * - The outputs will be 2-stage shifted before being stored, i.e.,
  8417. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  8418. */
  8419. static inline int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(const u8_t *in_tensor,
  8420. const uint16_t in_tensor_dim_x,
  8421. const uint16_t in_tensor_dim_y,
  8422. const uint16_t in_tensor_ch,
  8423. const q7_t *ker_weight,
  8424. const uint16_t out_tensor_ch,
  8425. const uint16_t ker_dim_x,
  8426. const uint16_t ker_dim_y,
  8427. const uint16_t pad_x,
  8428. const uint16_t pad_y,
  8429. const uint16_t stride_x,
  8430. const uint16_t stride_y,
  8431. const q31_t *bias,
  8432. const uint16_t pre_rshift,
  8433. const uint16_t out_scale,
  8434. const uint16_t post_rshift,
  8435. q7_t *out_tensor,
  8436. const uint16_t out_tensor_dim_x,
  8437. const uint16_t out_tensor_dim_y,
  8438. q15_t *in_tmp_buf)
  8439. {
  8440. #if defined(__zcc__)
  8441. return tpt_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(
  8442. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8443. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8444. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8445. out_tensor_dim_y, in_tmp_buf);
  8446. #else
  8447. return riscv_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(
  8448. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8449. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8450. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8451. out_tensor_dim_y, in_tmp_buf);
  8452. #endif
  8453. }
  8454. /**
  8455. * @brief This function performs 1x1 kernels convolution for unsigned
  8456. * 8-bit integer inputs and signed 16-bit integer outputs in
  8457. * any x and y dimensions with bias inputs and symmetric
  8458. * quantization on the outputs.
  8459. * @param[in] in_tensor pointer of the input tensor
  8460. * @param[in] in_tensor_dim_x x dimension of the input tensor
  8461. * @param[in] in_tensor_dim_y y dimension of the input tensor
  8462. * @param[in] in_tensor_ch number of input tensor channels
  8463. * @param[in] ker_weight pointer of kernel weights
  8464. * @param[in] out_tensor_ch number of output tensor channels
  8465. * @param[in] ker_dim_x x dimension of the filter kernel
  8466. * @param[in] ker_dim_y y dimension of the filter kernel
  8467. * @param[in] pad_x padding size in the x dimension
  8468. * @param[in] pad_y padding size in the y dimension
  8469. * @param[in] stride_x convolution stride in the x dimension
  8470. * @param[in] stride_y convolution stride in the y dimension
  8471. * @param[in] bias pointer of the bias vector
  8472. * @param[in] pre_rshift right shift amount for the output
  8473. * @param[in] out_scale value of scaling for the output
  8474. * @param[in] post_rshift right shift amount for the output
  8475. * @param[out] out_tensor pointer of the output tensor
  8476. * @param[in] out_tensor_dim_x x dimension of the output tensor
  8477. * @param[in] out_tensor_dim_y y dimension of the output tensor
  8478. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  8479. * It is required when -mext-dsp or
  8480. * -mext-vector is enabled and its size
  8481. * must be equal to "2 * in_tensor_ch *
  8482. * ker_dim_x * ker_dim_y".
  8483. * @return This function returns 0 on success; otherwise, it returns -1
  8484. * if its inputs do not meet the constraints (see the Note
  8485. * below for details).
  8486. *
  8487. * @note
  8488. * - The input constraints of this function are:
  8489. * - in_tensor_ch is a multiple of 4
  8490. * - out_tensor_ch is a multiple of 2
  8491. * - ker_dim_x is 1
  8492. * - ker_dim_y is 1
  8493. * - pad_x is 0
  8494. * - pad_y is 0
  8495. * - stride_x is 1
  8496. * - stride_y is 1
  8497. * - The outputs will be 2-stage shifted before being stored, i.e.,
  8498. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  8499. */
  8500. static inline int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(const u8_t *in_tensor,
  8501. const uint16_t in_tensor_dim_x,
  8502. const uint16_t in_tensor_dim_y,
  8503. const uint16_t in_tensor_ch,
  8504. const q7_t *ker_weight,
  8505. const uint16_t out_tensor_ch,
  8506. const uint16_t ker_dim_x,
  8507. const uint16_t ker_dim_y,
  8508. const uint16_t pad_x,
  8509. const uint16_t pad_y,
  8510. const uint16_t stride_x,
  8511. const uint16_t stride_y,
  8512. const q31_t *bias,
  8513. const uint16_t pre_rshift,
  8514. const uint16_t out_scale,
  8515. const uint16_t post_rshift,
  8516. q15_t *out_tensor,
  8517. const uint16_t out_tensor_dim_x,
  8518. const uint16_t out_tensor_dim_y,
  8519. q15_t *in_tmp_buf)
  8520. {
  8521. #if defined(__zcc__)
  8522. return tpt_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(
  8523. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8524. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8525. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8526. out_tensor_dim_y, in_tmp_buf);
  8527. #else
  8528. return riscv_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(
  8529. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8530. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8531. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8532. out_tensor_dim_y, in_tmp_buf);
  8533. #endif
  8534. }
  8535. /**
  8536. * @brief This function performs 1x1 kernels convolution for signed
  8537. * 8-bit integer inputs/outputs in any x and y dimensions with
  8538. * symmetric quantization on the outputs.
  8539. * @param[in] in_tensor pointer of the input tensor
  8540. * @param[in] in_tensor_dim_x x dimension of the input tensor
  8541. * @param[in] in_tensor_dim_y y dimension of the input tensor
  8542. * @param[in] in_tensor_ch number of input tensor channels
  8543. * @param[in] ker_weight pointer of kernel weights
  8544. * @param[in] out_tensor_ch number of output tensor channels
  8545. * @param[in] ker_dim_x x dimension of the filter kernel
  8546. * @param[in] ker_dim_y y dimension of the filter kernel
  8547. * @param[in] pad_x padding size in the x dimension
  8548. * @param[in] pad_y padding size in the y dimension
  8549. * @param[in] stride_x convolution stride in the x dimension
  8550. * @param[in] stride_y convolution stride in the y dimension
  8551. * @param[in] pre_rshift right shift amount for the output
  8552. * @param[in] out_scale value of scaling for the output
  8553. * @param[in] post_rshift right shift amount for the output
  8554. * @param[out] out_tensor pointer of the output tensor
  8555. * @param[in] out_tensor_dim_x x dimension of the output tensor
  8556. * @param[in] out_tensor_dim_y y dimension of the output tensor
  8557. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  8558. * It is required when -mext-dsp or
  8559. * -mext-vector is enabled and its size
  8560. * must be equal to 2 * in_tensor_ch *
  8561. * ker_dim_x * ker_dim_y.
  8562. * @return This function returns 0 on success; otherwise, it returns -1
  8563. * if its inputs do not meet the constraints (see the Note
  8564. * below for details).
  8565. *
  8566. * @note
  8567. * - The input constraints of this function are:
  8568. * - in_tensor_ch is a multiple of 4
  8569. * - out_tensor_ch is a multiple of 2
  8570. * - ker_dim_x is 1
  8571. * - ker_dim_y is 1
  8572. * - pad_x is 0
  8573. * - pad_y is 0
  8574. * - stride_x is 1
  8575. * - stride_y is 1
  8576. * - The outputs will be 2-stage shifted before being stored, i.e.,
  8577. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  8578. */
  8579. static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(const q7_t *in_tensor,
  8580. const uint16_t in_tensor_dim_x,
  8581. const uint16_t in_tensor_dim_y,
  8582. const uint16_t in_tensor_ch,
  8583. const q7_t *ker_weight,
  8584. const uint16_t out_tensor_ch,
  8585. const uint16_t ker_dim_x,
  8586. const uint16_t ker_dim_y,
  8587. const uint16_t pad_x,
  8588. const uint16_t pad_y,
  8589. const uint16_t stride_x,
  8590. const uint16_t stride_y,
  8591. const uint16_t pre_rshift,
  8592. const uint16_t out_scale,
  8593. const uint16_t post_rshift,
  8594. q7_t *out_tensor,
  8595. const uint16_t out_tensor_dim_x,
  8596. const uint16_t out_tensor_dim_y,
  8597. q15_t *in_tmp_buf)
  8598. {
  8599. #if defined(__zcc__)
  8600. return tpt_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(
  8601. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8602. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8603. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8604. out_tensor_dim_y, in_tmp_buf);
  8605. #else
  8606. return riscv_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(
  8607. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8608. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8609. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8610. out_tensor_dim_y, in_tmp_buf);
  8611. #endif
  8612. }
  8613. /**
  8614. * @brief This function performs 1x1 kernels convolution for signed
  8615. * 8-bit integer inputs and signed 16-bit integer outputs in
  8616. * any x and y dimensions with symmetric quantization on the
  8617. * outputs.
  8618. * @param[in] in_tensor pointer of the input tensor
  8619. * @param[in] in_tensor_dim_x x dimension of the input tensor
  8620. * @param[in] in_tensor_dim_y y dimension of the input tensor
  8621. * @param[in] in_tensor_ch number of input tensor channels
  8622. * @param[in] ker_weight pointer of kernel weights
  8623. * @param[in] out_tensor_ch number of output tensor channels
  8624. * @param[in] ker_dim_x x dimension of the filter kernel
  8625. * @param[in] ker_dim_y y dimension of the filter kernel
  8626. * @param[in] pad_x padding size in the x dimension
  8627. * @param[in] pad_y padding size in the y dimension
  8628. * @param[in] stride_x convolution stride in the x dimension
  8629. * @param[in] stride_y convolution stride in the y dimension
  8630. * @param[in] pre_rshift right shift amount for the output
  8631. * @param[in] out_scale value of scaling for the output
  8632. * @param[in] post_rshift right shift amount for the output
  8633. * @param[out] out_tensor pointer of the output tensor
  8634. * @param[in] out_tensor_dim_x x dimension of the output tensor
  8635. * @param[in] out_tensor_dim_y y dimension of the output tensor
  8636. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  8637. * It is required when -mext-dsp or
  8638. * -mext-vector is enabled and its size
  8639. * must be equal to 2 * in_tensor_ch *
  8640. * ker_dim_x * ker_dim_y.
  8641. * @return This function returns 0 on success; otherwise, it returns -1
  8642. * if its inputs do not meet the constraints (see the Note
  8643. * below for details).
  8644. *
  8645. * @note
  8646. * - The input constraints of this function are:
  8647. * - in_tensor_ch is a multiple of 4
  8648. * - out_tensor_ch is a multiple of 2
  8649. * - ker_dim_x is 1
  8650. * - ker_dim_y is 1
  8651. * - pad_x is 0
  8652. * - pad_y is 0
  8653. * - stride_x is 1
  8654. * - stride_y is 1
  8655. * - The outputs will be 2-stage shifted before being stored, i.e.,
  8656. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  8657. */
  8658. static inline int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(const q7_t *in_tensor,
  8659. const uint16_t in_tensor_dim_x,
  8660. const uint16_t in_tensor_dim_y,
  8661. const uint16_t in_tensor_ch,
  8662. const q7_t *ker_weight,
  8663. const uint16_t out_tensor_ch,
  8664. const uint16_t ker_dim_x,
  8665. const uint16_t ker_dim_y,
  8666. const uint16_t pad_x,
  8667. const uint16_t pad_y,
  8668. const uint16_t stride_x,
  8669. const uint16_t stride_y,
  8670. const uint16_t pre_rshift,
  8671. const uint16_t out_scale,
  8672. const uint16_t post_rshift,
  8673. q15_t *out_tensor,
  8674. const uint16_t out_tensor_dim_x,
  8675. const uint16_t out_tensor_dim_y,
  8676. q15_t *in_tmp_buf)
  8677. {
  8678. #if defined(__zcc__)
  8679. return tpt_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(
  8680. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8681. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8682. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8683. out_tensor_dim_y, in_tmp_buf);
  8684. #else
  8685. return riscv_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(
  8686. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8687. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8688. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8689. out_tensor_dim_y, in_tmp_buf);
  8690. #endif
  8691. }
  8692. /**
  8693. * @brief This function performs 1x1 kernels convolution for unsigned
  8694. * 8-bit integer inputs/outputs in any x and y dimensions with
  8695. * symmetric quantization on the outputs.
  8696. * @param[in] in_tensor pointer of the input tensor
  8697. * @param[in] in_tensor_dim_x x dimension of the input tensor
  8698. * @param[in] in_tensor_dim_y y dimension of the input tensor
  8699. * @param[in] in_tensor_ch number of input tensor channels
  8700. * @param[in] ker_weight pointer of kernel weights
  8701. * @param[in] out_tensor_ch number of output tensor channels
  8702. * @param[in] ker_dim_x x dimension of the filter kernel
  8703. * @param[in] ker_dim_y y dimension of the filter kernel
  8704. * @param[in] pad_x padding size in the x dimension
  8705. * @param[in] pad_y padding size in the y dimension
  8706. * @param[in] stride_x convolution stride in the x dimension
  8707. * @param[in] stride_y convolution stride in the y dimension
  8708. * @param[in] pre_rshift right shift amount for the output
  8709. * @param[in] out_scale value of scaling for the output
  8710. * @param[in] post_rshift right shift amount for the output
  8711. * @param[out] out_tensor pointer of the output tensor
  8712. * @param[in] out_tensor_dim_x x dimension of the output tensor
  8713. * @param[in] out_tensor_dim_y y dimension of the output tensor
  8714. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  8715. * It is required when -mext-dsp or
  8716. * -mext-vector is enabled and its size
  8717. * must be equal to "2 * in_tensor_ch *
  8718. * ker_dim_x * ker_dim_y".
  8719. * @return This function returns 0 on success; otherwise, it returns -1
  8720. * if its inputs do not meet the constraints (see the Note
  8721. * below for details).
  8722. *
  8723. * @note
  8724. * - The input constraints of this function are:
  8725. * - in_tensor_ch is a multiple of 4
  8726. * - out_tensor_ch is a multiple of 2
  8727. * - ker_dim_x is 1
  8728. * - ker_dim_y is 1
  8729. * - pad_x is 0
  8730. * - pad_y is 0
  8731. * - stride_x is 1
  8732. * - stride_y is 1
  8733. * - The outputs will be 2-stage shifted before being stored, i.e.,
  8734. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  8735. */
  8736. static inline int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(const u8_t *in_tensor,
  8737. const uint16_t in_tensor_dim_x,
  8738. const uint16_t in_tensor_dim_y,
  8739. const uint16_t in_tensor_ch,
  8740. const q7_t *ker_weight,
  8741. const uint16_t out_tensor_ch,
  8742. const uint16_t ker_dim_x,
  8743. const uint16_t ker_dim_y,
  8744. const uint16_t pad_x,
  8745. const uint16_t pad_y,
  8746. const uint16_t stride_x,
  8747. const uint16_t stride_y,
  8748. const uint16_t pre_rshift,
  8749. const uint16_t out_scale,
  8750. const uint16_t post_rshift,
  8751. u8_t *out_tensor,
  8752. const uint16_t out_tensor_dim_x,
  8753. const uint16_t out_tensor_dim_y,
  8754. q15_t *in_tmp_buf)
  8755. {
  8756. #if defined(__zcc__)
  8757. return tpt_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(
  8758. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8759. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8760. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8761. out_tensor_dim_y, in_tmp_buf);
  8762. #else
  8763. return riscv_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(
  8764. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8765. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8766. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8767. out_tensor_dim_y, in_tmp_buf);
  8768. #endif
  8769. }
  8770. /**
  8771. * @brief This function performs 1x1 kernels convolution for unsigned
  8772. * 8-bit integer inputs and signed 8-bit integer outputs in any
  8773. * x and y dimensions with symmetric quantization on the
  8774. * outputs.
  8775. * @param[in] in_tensor pointer of the input tensor
  8776. * @param[in] in_tensor_dim_x x dimension of the input tensor
  8777. * @param[in] in_tensor_dim_y y dimension of the input tensor
  8778. * @param[in] in_tensor_ch number of input tensor channels
  8779. * @param[in] ker_weight pointer of kernel weights
  8780. * @param[in] out_tensor_ch number of output tensor channels
  8781. * @param[in] ker_dim_x x dimension of the filter kernel
  8782. * @param[in] ker_dim_y y dimension of the filter kernel
  8783. * @param[in] pad_x padding size in the x dimension
  8784. * @param[in] pad_y padding size in the y dimension
  8785. * @param[in] stride_x convolution stride in the x dimension
  8786. * @param[in] stride_y convolution stride in the y dimension
  8787. * @param[in] pre_rshift right shift amount for the output
  8788. * @param[in] out_scale value of scaling for the output
  8789. * @param[in] post_rshift right shift amount for the output
  8790. * @param[out] out_tensor pointer of the output tensor
  8791. * @param[in] out_tensor_dim_x x dimension of the output tensor
  8792. * @param[in] out_tensor_dim_y y dimension of the output tensor
  8793. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  8794. * It is required when -mext-dsp or
  8795. * -mext-vector is enabled and its size
  8796. * must be equal to 2 * in_tensor_ch *
  8797. * ker_dim_x * ker_dim_y.
  8798. * @return This function returns 0 on success; otherwise, it returns -1
  8799. * if its inputs do not meet the constraints (see the Note
  8800. * below for details).
  8801. *
  8802. * @note
  8803. * - The input constraints of this function are:
  8804. * - in_tensor_ch is a multiple of 4
  8805. * - out_tensor_ch is a multiple of 2
  8806. * - ker_dim_x is 1
  8807. * - ker_dim_y is 1
  8808. * - pad_x is 0
  8809. * - pad_y is 0
  8810. * - stride_x is 1
  8811. * - stride_y is 1
  8812. * - The outputs will be 2-stage shifted before being stored, i.e.,
  8813. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  8814. */
  8815. static inline int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(const u8_t *in_tensor,
  8816. const uint16_t in_tensor_dim_x,
  8817. const uint16_t in_tensor_dim_y,
  8818. const uint16_t in_tensor_ch,
  8819. const q7_t *ker_weight,
  8820. const uint16_t out_tensor_ch,
  8821. const uint16_t ker_dim_x,
  8822. const uint16_t ker_dim_y,
  8823. const uint16_t pad_x,
  8824. const uint16_t pad_y,
  8825. const uint16_t stride_x,
  8826. const uint16_t stride_y,
  8827. const uint16_t pre_rshift,
  8828. const uint16_t out_scale,
  8829. const uint16_t post_rshift,
  8830. q7_t *out_tensor,
  8831. const uint16_t out_tensor_dim_x,
  8832. const uint16_t out_tensor_dim_y,
  8833. q15_t *in_tmp_buf)
  8834. {
  8835. #if defined(__zcc__)
  8836. return tpt_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(
  8837. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8838. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8839. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8840. out_tensor_dim_y, in_tmp_buf);
  8841. #else
  8842. return riscv_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(
  8843. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8844. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8845. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8846. out_tensor_dim_y, in_tmp_buf);
  8847. #endif
  8848. }
  8849. /**
  8850. * @brief This function performs 1x1 kernels convolution for unsigned
  8851. * 8-bit integer inputs and signed 16-bit integer outputs in
  8852. * any x and y dimensions with symmetric quantization on the
  8853. * outputs.
  8854. * @param[in] in_tensor pointer of the input tensor
  8855. * @param[in] in_tensor_dim_x x dimension of the input tensor
  8856. * @param[in] in_tensor_dim_y y dimension of the input tensor
  8857. * @param[in] in_tensor_ch number of input tensor channels
  8858. * @param[in] ker_weight pointer of kernel weights
  8859. * @param[in] out_tensor_ch number of output tensor channels
  8860. * @param[in] ker_dim_x x dimension of the filter kernel
  8861. * @param[in] ker_dim_y y dimension of the filter kernel
  8862. * @param[in] pad_x padding size in the x dimension
  8863. * @param[in] pad_y padding size in the y dimension
  8864. * @param[in] stride_x convolution stride in the x dimension
  8865. * @param[in] stride_y convolution stride in the y dimension
  8866. * @param[in] pre_rshift right shift amount for the output
  8867. * @param[in] out_scale value of scaling for the output
  8868. * @param[in] post_rshift right shift amount for the output
  8869. * @param[out] out_tensor pointer of the output tensor
  8870. * @param[in] out_tensor_dim_x x dimension of the output tensor
  8871. * @param[in] out_tensor_dim_y y dimension of the output tensor
  8872. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  8873. * It is required when -mext-dsp or
  8874. * -mext-vector is enabled and its size
  8875. * must be equal to "2 * in_tensor_ch *
  8876. * ker_dim_x * ker_dim_y".
  8877. * @return This function returns 0 on success; otherwise, it returns -1
  8878. * if its inputs do not meet the constraints (see the Note
  8879. * below for details).
  8880. *
  8881. * @note
  8882. * - The input constraints of this function are:
  8883. * - in_tensor_ch is a multiple of 4
  8884. * - out_tensor_ch is a multiple of 2
  8885. * - ker_dim_x is 1
  8886. * - ker_dim_y is 1
  8887. * - pad_x is 0
  8888. * - pad_y is 0
  8889. * - stride_x is 1
  8890. * - stride_y is 1
  8891. * - The outputs will be 2-stage shifted before being stored, i.e.,
  8892. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  8893. */
  8894. static inline int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(const u8_t *in_tensor,
  8895. const uint16_t in_tensor_dim_x,
  8896. const uint16_t in_tensor_dim_y,
  8897. const uint16_t in_tensor_ch,
  8898. const q7_t *ker_weight,
  8899. const uint16_t out_tensor_ch,
  8900. const uint16_t ker_dim_x,
  8901. const uint16_t ker_dim_y,
  8902. const uint16_t pad_x,
  8903. const uint16_t pad_y,
  8904. const uint16_t stride_x,
  8905. const uint16_t stride_y,
  8906. const uint16_t pre_rshift,
  8907. const uint16_t out_scale,
  8908. const uint16_t post_rshift,
  8909. q15_t *out_tensor,
  8910. const uint16_t out_tensor_dim_x,
  8911. const uint16_t out_tensor_dim_y,
  8912. q15_t *in_tmp_buf)
  8913. {
  8914. #if defined(__zcc__)
  8915. return tpt_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(
  8916. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8917. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8918. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8919. out_tensor_dim_y, in_tmp_buf);
  8920. #else
  8921. return riscv_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(
  8922. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  8923. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  8924. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  8925. out_tensor_dim_y, in_tmp_buf);
  8926. #endif
  8927. }
  8928. /**
  8929. * @brief This function performs fast convolution on RGB images for
  8930. * signed 8-bit integer inputs/outputs with bias inputs and
  8931. * symmetric quantization on the outputs.
  8932. * @param[in] in_tensor pointer of the input tensor
  8933. * @param[in] in_tensor_dim input tensor dimension
  8934. * @param[in] ker_weight pointer of kernel weights
  8935. * @param[in] out_tensor_ch number of output tensor channels
  8936. * @param[in] ker_dim dimension of the filter kernel
  8937. * @param[in] pad padding size
  8938. * @param[in] stride convolution stride
  8939. * @param[in] bias pointer of the bias vector
  8940. * @param[in] pre_rshift right shift amount for the output
  8941. * @param[in] out_scale value of scaling for the output
  8942. * @param[in] post_rshift right shift amount for the output
  8943. * @param[out] out_tensor pointer of the output tensor
  8944. * @param[in] out_tensor_dim dimension of the output tensor
  8945. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  8946. * required when -mext-dsp or -mext-vector
  8947. * enabled and its size must be "2 * (3 *
  8948. * ker_dim * ker_dim + 1)".
  8949. * @param[in] wt_tmp_buf temporary buffer for kernel weights. It is
  8950. * required when -mext-dsp or -mext-vector
  8951. * enabled and its size must be "out_tensor_ch *
  8952. * (3 * ker_dim * ker_dim + 1)".
  8953. * @return This function only returns 0.
  8954. *
  8955. * @note
  8956. * The outputs will be 2-stage shifted before being stored, i.e.,
  8957. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  8958. */
  8959. static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(const q7_t *in_tensor,
  8960. const uint16_t in_tensor_dim,
  8961. const q7_t *ker_weight,
  8962. const uint16_t out_tensor_ch,
  8963. const uint16_t ker_dim,
  8964. const uint16_t pad,
  8965. const uint16_t stride,
  8966. const q31_t *bias,
  8967. const uint16_t pre_rshift,
  8968. const uint16_t out_scale,
  8969. const uint16_t post_rshift,
  8970. q7_t *out_tensor,
  8971. const uint16_t out_tensor_dim,
  8972. q15_t *in_tmp_buf,
  8973. q15_t *wt_tmp_buf)
  8974. {
  8975. #if defined(__zcc__)
  8976. return tpt_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(
  8977. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  8978. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  8979. in_tmp_buf, wt_tmp_buf);
  8980. #else
  8981. return riscv_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(
  8982. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  8983. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  8984. in_tmp_buf, wt_tmp_buf);
  8985. #endif
  8986. }
  8987. /**
  8988. * @brief This function performs fast convolution on RGB images for
  8989. * signed 8-bit integer inputs and signed 16-bit integer
  8990. * outputs with bias inputs and symmetric quantization on the
  8991. * outputs.
  8992. * @param[in] in_tensor pointer of the input tensor
  8993. * @param[in] in_tensor_dim input tensor dimension
  8994. * @param[in] ker_weight pointer of kernel weights
  8995. * @param[in] out_tensor_ch number of output tensor channels
  8996. * @param[in] ker_dim dimension of the filter kernel
  8997. * @param[in] pad padding size
  8998. * @param[in] stride convolution stride
  8999. * @param[in] bias pointer of the bias vector
  9000. * @param[in] pre_rshift right shift amount for the output
  9001. * @param[in] out_scale value of scaling for the output
  9002. * @param[in] post_rshift right shift amount for the output
  9003. * @param[out] out_tensor pointer of the output tensor
  9004. * @param[in] out_tensor_dim dimension of the output tensor
  9005. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9006. * required when -mext-dsp or -mext-vector
  9007. * enabled and its size must be "2 * (3 *
  9008. * ker_dim * ker_dim + 1)".
  9009. * @param[in] wt_tmp_buf temporary buffer for kernel weights. It is
  9010. * required when -mext-dsp or -mext-vector
  9011. * enabled and its size must be "out_tensor_ch *
  9012. * (3 * ker_dim * ker_dim + 1)".
  9013. * @return This function only returns 0.
  9014. *
  9015. * @note
  9016. * The outputs will be 2-stage shifted before being stored, i.e.,
  9017. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9018. */
  9019. static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(const q7_t *in_tensor,
  9020. const uint16_t in_tensor_dim,
  9021. const q7_t *ker_weight,
  9022. const uint16_t out_tensor_ch,
  9023. const uint16_t ker_dim,
  9024. const uint16_t pad,
  9025. const uint16_t stride,
  9026. const q31_t *bias,
  9027. const uint16_t pre_rshift,
  9028. const uint16_t out_scale,
  9029. const uint16_t post_rshift,
  9030. q15_t *out_tensor,
  9031. const uint16_t out_tensor_dim,
  9032. q15_t *in_tmp_buf,
  9033. q15_t *wt_tmp_buf)
  9034. {
  9035. #if defined(__zcc__)
  9036. return tpt_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(
  9037. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9038. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9039. in_tmp_buf, wt_tmp_buf);
  9040. #else
  9041. return riscv_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(
  9042. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9043. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9044. in_tmp_buf, wt_tmp_buf);
  9045. #endif
  9046. }
  9047. /**
  9048. * @brief This function performs fast convolution on RGB images for
  9049. * unsigned 8-bit integer inputs/outputs with symmetric
  9050. * quantization on the outputs.
  9051. * @param[in] in_tensor pointer of the input tensor
  9052. * @param[in] in_tensor_dim input tensor dimension
  9053. * @param[in] ker_weight pointer of kernel weights
  9054. * @param[in] out_tensor_ch number of output tensor channels
  9055. * @param[in] ker_dim dimension of the filter kernel
  9056. * @param[in] pad padding size
  9057. * @param[in] stride convolution stride
  9058. * @param[in] bias pointer of the bias vector
  9059. * @param[in] pre_rshift right shift amount for the output
  9060. * @param[in] out_scale value of scaling for the output
  9061. * @param[in] post_rshift right shift amount for the output
  9062. * @param[out] out_tensor pointer of the output tensor
  9063. * @param[in] out_tensor_dim dimension of the output tensor
  9064. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9065. * required when -mext-dsp or -mext-vector
  9066. * enabled and its size must be "2 * (3 *
  9067. * ker_dim * ker_dim + 1)".
  9068. * @param[in] wt_tmp_buf temporary buffer for kernel weights. It is
  9069. * required when -mext-dsp or -mext-vector
  9070. * enabled and its size must be "out_tensor_ch *
  9071. * (3 * ker_dim * ker_dim + 1)".
  9072. * @return This function only returns 0.
  9073. *
  9074. * @note
  9075. * The outputs will be 2-stage shifted before being stored, i.e.,
  9076. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9077. */
  9078. static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(const u8_t *in_tensor,
  9079. const uint16_t in_tensor_dim,
  9080. const q7_t *ker_weight,
  9081. const uint16_t out_tensor_ch,
  9082. const uint16_t ker_dim,
  9083. const uint16_t pad,
  9084. const uint16_t stride,
  9085. const q31_t *bias,
  9086. const uint16_t pre_rshift,
  9087. const uint16_t out_scale,
  9088. const uint16_t post_rshift,
  9089. u8_t *out_tensor,
  9090. const uint16_t out_tensor_dim,
  9091. q15_t *in_tmp_buf,
  9092. q15_t *wt_tmp_buf)
  9093. {
  9094. #if defined(__zcc__)
  9095. return tpt_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(
  9096. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9097. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9098. in_tmp_buf, wt_tmp_buf);
  9099. #else
  9100. return riscv_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(
  9101. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9102. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9103. in_tmp_buf, wt_tmp_buf);
  9104. #endif
  9105. }
  9106. /**
  9107. * @brief This function performs fast convolution on RGB images for
  9108. * signed 8-bit integer inputs/outputs with bias inputs and
  9109. * symmetric quantization on the outputs.
  9110. * @param[in] in_tensor pointer of the input tensor
  9111. * @param[in] in_tensor_dim input tensor dimension
  9112. * @param[in] ker_weight pointer of kernel weights
  9113. * @param[in] out_tensor_ch number of output tensor channels
  9114. * @param[in] ker_dim dimension of the filter kernel
  9115. * @param[in] pad padding size
  9116. * @param[in] stride convolution stride
  9117. * @param[in] bias pointer of the bias vector
  9118. * @param[in] pre_rshift right shift amount for the output
  9119. * @param[in] out_scale value of scaling for the output
  9120. * @param[in] post_rshift right shift amount for the output
  9121. * @param[out] out_tensor pointer of the output tensor
  9122. * @param[in] out_tensor_dim dimension of the output tensor
  9123. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9124. * required when -mext-dsp or -mext-vector
  9125. * enabled and its size must be "2 * (3 *
  9126. * ker_dim * ker_dim + 1)".
  9127. * @param[in] wt_tmp_buf temporary buffer for kernel weights. It is
  9128. * required when -mext-dsp or -mext-vector
  9129. * enabled and its size must be "out_tensor_ch *
  9130. * (3 * ker_dim * ker_dim + 1)".
  9131. * @return This function only returns 0.
  9132. *
  9133. * @note
  9134. * The outputs will be 2-stage shifted before being stored, i.e.,
  9135. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9136. */
  9137. static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(const u8_t *in_tensor,
  9138. const uint16_t in_tensor_dim,
  9139. const q7_t *ker_weight,
  9140. const uint16_t out_tensor_ch,
  9141. const uint16_t ker_dim,
  9142. const uint16_t pad,
  9143. const uint16_t stride,
  9144. const q31_t *bias,
  9145. const uint16_t pre_rshift,
  9146. const uint16_t out_scale,
  9147. const uint16_t post_rshift,
  9148. q7_t *out_tensor,
  9149. const uint16_t out_tensor_dim,
  9150. q15_t *in_tmp_buf,
  9151. q15_t *wt_tmp_buf)
  9152. {
  9153. #if defined(__zcc__)
  9154. return tpt_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(
  9155. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9156. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9157. in_tmp_buf, wt_tmp_buf);
  9158. #else
  9159. return riscv_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(
  9160. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9161. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9162. in_tmp_buf, wt_tmp_buf);
  9163. #endif
  9164. }
  9165. /**
  9166. * @brief This function performs fast convolution on RGB images for
  9167. * unsigned 8-bit integer inputs and signed 16-bit integer
  9168. * outputs with bias inputs and symmetric quantization on the
  9169. * outputs.
  9170. * @param[in] in_tensor pointer of the input tensor
  9171. * @param[in] in_tensor_dim input tensor dimension
  9172. * @param[in] ker_weight pointer of kernel weights
  9173. * @param[in] out_tensor_ch number of output tensor channels
  9174. * @param[in] ker_dim dimension of the filter kernel
  9175. * @param[in] pad padding size
  9176. * @param[in] stride convolution stride
  9177. * @param[in] bias pointer of the bias vector
  9178. * @param[in] pre_rshift right shift amount for the output
  9179. * @param[in] out_scale value of scaling for the output
  9180. * @param[in] post_rshift right shift amount for the output
  9181. * @param[out] out_tensor pointer of the output tensor
  9182. * @param[in] out_tensor_dim dimension of the output tensor
  9183. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9184. * required when -mext-dsp or -mext-vector
  9185. * enabled and its size must be "2 * (3 *
  9186. * ker_dim * ker_dim + 1)".
  9187. * @param[in] wt_tmp_buf temporary buffer for kernel weights. It is
  9188. * required when -mext-dsp or -mext-vector
  9189. * enabled and its size must be "out_tensor_ch *
  9190. * (3 * ker_dim * ker_dim + 1)".
  9191. * @return This function only returns 0.
  9192. *
  9193. * @note
  9194. * The outputs will be 2-stage shifted before being stored, i.e.,
  9195. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9196. */
  9197. static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(const u8_t *in_tensor,
  9198. const uint16_t in_tensor_dim,
  9199. const q7_t *ker_weight,
  9200. const uint16_t out_tensor_ch,
  9201. const uint16_t ker_dim,
  9202. const uint16_t pad,
  9203. const uint16_t stride,
  9204. const q31_t *bias,
  9205. const uint16_t pre_rshift,
  9206. const uint16_t out_scale,
  9207. const uint16_t post_rshift,
  9208. q15_t *out_tensor,
  9209. const uint16_t out_tensor_dim,
  9210. q15_t *in_tmp_buf,
  9211. q15_t *wt_tmp_buf)
  9212. {
  9213. #if defined(__zcc__)
  9214. return tpt_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(
  9215. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9216. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9217. in_tmp_buf, wt_tmp_buf);
  9218. #else
  9219. return riscv_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(
  9220. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9221. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9222. in_tmp_buf, wt_tmp_buf);
  9223. #endif
  9224. }
  9225. /**
  9226. * @brief This function performs fast convolution on RGB images for
  9227. * signed 8-bit integer inputs/outputs with symmetric
  9228. * quantization on the outputs.
  9229. * @param[in] in_tensor pointer of the input tensor
  9230. * @param[in] in_tensor_dim input tensor dimension
  9231. * @param[in] ker_weight pointer of kernel weights
  9232. * @param[in] out_tensor_ch number of output tensor channels
  9233. * @param[in] ker_dim dimension of the filter kernel
  9234. * @param[in] pad padding size
  9235. * @param[in] stride convolution stride
  9236. * @param[in] pre_rshift right shift amount for the output
  9237. * @param[in] out_scale value of scaling for the output
  9238. * @param[in] post_rshift right shift amount for the output
  9239. * @param[out] out_tensor pointer of the output tensor
  9240. * @param[in] out_tensor_dim dimension of the output tensor
  9241. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9242. * required when -mext-dsp or -mext-vector
  9243. * enabled and its size must be "2 * (3 *
  9244. * ker_dim * ker_dim + 1)".
  9245. * @param[in] wt_tmp_buf temporary buffer for kernel weights. It is
  9246. * required when -mext-dsp or -mext-vector
  9247. * enabled and its size must be "out_tensor_ch *
  9248. * (3 * ker_dim * ker_dim + 1)".
  9249. * @return This function only returns 0.
  9250. *
  9251. * @note
  9252. * The outputs will be 2-stage shifted before being stored, i.e.,
  9253. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9254. */
  9255. static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(const q7_t *in_tensor,
  9256. const uint16_t in_tensor_dim,
  9257. const q7_t *ker_weight,
  9258. const uint16_t out_tensor_ch,
  9259. const uint16_t ker_dim,
  9260. const uint16_t pad,
  9261. const uint16_t stride,
  9262. const uint16_t pre_rshift,
  9263. const uint16_t out_scale,
  9264. const uint16_t post_rshift,
  9265. q7_t *out_tensor,
  9266. const uint16_t out_tensor_dim,
  9267. q15_t *in_tmp_buf,
  9268. q15_t *wt_tmp_buf)
  9269. {
  9270. #if defined(__zcc__)
  9271. return tpt_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(
  9272. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9273. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9274. in_tmp_buf, wt_tmp_buf);
  9275. #else
  9276. return riscv_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(
  9277. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9278. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9279. in_tmp_buf, wt_tmp_buf);
  9280. #endif
  9281. }
  9282. /**
  9283. * @brief This function performs fast convolution on RGB images for
  9284. * signed 8-bit integer inputs and signed 16-bit integer
  9285. * outputs with symmetric quantization on the outputs.
  9286. * @param[in] in_tensor pointer of the input tensor
  9287. * @param[in] in_tensor_dim input tensor dimension
  9288. * @param[in] ker_weight pointer of kernel weights
  9289. * @param[in] out_tensor_ch number of output tensor channels
  9290. * @param[in] ker_dim dimension of the filter kernel
  9291. * @param[in] pad padding size
  9292. * @param[in] stride convolution stride
  9293. * @param[in] pre_rshift right shift amount for the output
  9294. * @param[in] out_scale value of scaling for the output
  9295. * @param[in] post_rshift right shift amount for the output
  9296. * @param[out] out_tensor pointer of the output tensor
  9297. * @param[in] out_tensor_dim dimension of the output tensor
  9298. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9299. * required when -mext-dsp or -mext-vector
  9300. * enabled and its size must be "2 * (3 *
  9301. * ker_dim * ker_dim + 1)".
  9302. * @param[in] wt_tmp_buf temporary buffer for kernel weights. It is
  9303. * required when -mext-dsp or -mext-vector
  9304. * enabled and its size must be "out_tensor_ch *
  9305. * (3 * ker_dim * ker_dim + 1)".
  9306. * @return This function only returns 0.
  9307. *
  9308. * @note
  9309. * The outputs will be 2-stage shifted before being stored, i.e.,
  9310. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9311. */
  9312. static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(const q7_t *in_tensor,
  9313. const uint16_t in_tensor_dim,
  9314. const q7_t *ker_weight,
  9315. const uint16_t out_tensor_ch,
  9316. const uint16_t ker_dim,
  9317. const uint16_t pad,
  9318. const uint16_t stride,
  9319. const uint16_t pre_rshift,
  9320. const uint16_t out_scale,
  9321. const uint16_t post_rshift,
  9322. q15_t *out_tensor,
  9323. const uint16_t out_tensor_dim,
  9324. q15_t *in_tmp_buf,
  9325. q15_t *wt_tmp_buf)
  9326. {
  9327. #if defined(__zcc__)
  9328. return tpt_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(
  9329. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9330. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9331. in_tmp_buf, wt_tmp_buf);
  9332. #else
  9333. return riscv_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(
  9334. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9335. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9336. in_tmp_buf, wt_tmp_buf);
  9337. #endif
  9338. }
  9339. /**
  9340. * @brief This function performs fast convolution on RGB images for
  9341. * unsigned 8-bit integer inputs/outputs with symmetric
  9342. * quantization on the outputs.
  9343. * @param[in] in_tensor pointer of the input tensor
  9344. * @param[in] in_tensor_dim input tensor dimension
  9345. * @param[in] ker_weight pointer of kernel weights
  9346. * @param[in] out_tensor_ch number of output tensor channels
  9347. * @param[in] ker_dim dimension of the filter kernel
  9348. * @param[in] pad padding size
  9349. * @param[in] stride convolution stride
  9350. * @param[in] pre_rshift right shift amount for the output
  9351. * @param[in] out_scale value of scaling for the output
  9352. * @param[in] post_rshift right shift amount for the output
  9353. * @param[out] out_tensor pointer of the output tensor
  9354. * @param[in] out_tensor_dim dimension of the output tensor
  9355. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9356. * required when -mext-dsp or -mext-vector
  9357. * enabled and its size must be "2 * (3 *
  9358. * ker_dim * ker_dim + 1)".
  9359. * @param[in] wt_tmp_buf temporary buffer for kernel weights. It is
  9360. * required when -mext-dsp or -mext-vector
  9361. * enabled and its size must be "out_tensor_ch *
  9362. * (3 * ker_dim * ker_dim + 1)".
  9363. * @return This function only returns 0.
  9364. *
  9365. * @note
  9366. * The outputs will be 2-stage shifted before being stored, i.e.,
  9367. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9368. */
  9369. static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(const u8_t *in_tensor,
  9370. const uint16_t in_tensor_dim,
  9371. const q7_t *ker_weight,
  9372. const uint16_t out_tensor_ch,
  9373. const uint16_t ker_dim,
  9374. const uint16_t pad,
  9375. const uint16_t stride,
  9376. const uint16_t pre_rshift,
  9377. const uint16_t out_scale,
  9378. const uint16_t post_rshift,
  9379. u8_t *out_tensor,
  9380. const uint16_t out_tensor_dim,
  9381. q15_t *in_tmp_buf,
  9382. q15_t *wt_tmp_buf)
  9383. {
  9384. #if defined(__zcc__)
  9385. return tpt_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(
  9386. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9387. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9388. in_tmp_buf, wt_tmp_buf);
  9389. #else
  9390. return riscv_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(
  9391. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9392. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9393. in_tmp_buf, wt_tmp_buf);
  9394. #endif
  9395. }
  9396. /**
  9397. * @brief This function performs fast convolution on RGB images for
  9398. * unsigned 8-bit integer inputs and signed 8-bit integer
  9399. * outputs with symmetric quantization on the outputs.
  9400. * @param[in] in_tensor pointer of the input tensor
  9401. * @param[in] in_tensor_dim input tensor dimension
  9402. * @param[in] ker_weight pointer of kernel weights
  9403. * @param[in] out_tensor_ch number of output tensor channels
  9404. * @param[in] ker_dim dimension of the filter kernel
  9405. * @param[in] pad padding size
  9406. * @param[in] stride convolution stride
  9407. * @param[in] pre_rshift right shift amount for the output
  9408. * @param[in] out_scale value of scaling for the output
  9409. * @param[in] post_rshift right shift amount for the output
  9410. * @param[out] out_tensor pointer of the output tensor
  9411. * @param[in] out_tensor_dim dimension of the output tensor
  9412. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9413. * required when -mext-dsp or -mext-vector
  9414. * enabled and its size must be "2 * (3 *
  9415. * ker_dim * ker_dim + 1)".
  9416. * @param[in] wt_tmp_buf temporary buffer for kernel weights. It is
  9417. * required when -mext-dsp or -mext-vector
  9418. * enabled and its size must be "out_tensor_ch *
  9419. * (3 * ker_dim * ker_dim + 1)".
  9420. * @return This function only returns 0.
  9421. *
  9422. * @note
  9423. * The outputs will be 2-stage shifted before being stored, i.e.,
  9424. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9425. */
  9426. static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(const u8_t *in_tensor,
  9427. const uint16_t in_tensor_dim,
  9428. const q7_t *ker_weight,
  9429. const uint16_t out_tensor_ch,
  9430. const uint16_t ker_dim,
  9431. const uint16_t pad,
  9432. const uint16_t stride,
  9433. const uint16_t pre_rshift,
  9434. const uint16_t out_scale,
  9435. const uint16_t post_rshift,
  9436. q7_t *out_tensor,
  9437. const uint16_t out_tensor_dim,
  9438. q15_t *in_tmp_buf,
  9439. q15_t *wt_tmp_buf)
  9440. {
  9441. #if defined(__zcc__)
  9442. return tpt_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(
  9443. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9444. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9445. in_tmp_buf, wt_tmp_buf);
  9446. #else
  9447. return riscv_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(
  9448. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9449. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9450. in_tmp_buf, wt_tmp_buf);
  9451. #endif
  9452. }
  9453. /**
  9454. * @brief This function performs fast convolution on RGB images for
  9455. * unsigned 8-bit integer inputs and signed 16-bit integer
  9456. * outputs with symmetric quantization on the outputs.
  9457. * @param[in] in_tensor pointer of the input tensor
  9458. * @param[in] in_tensor_dim input tensor dimension
  9459. * @param[in] ker_weight pointer of kernel weights
  9460. * @param[in] out_tensor_ch number of output tensor channels
  9461. * @param[in] ker_dim dimension of the filter kernel
  9462. * @param[in] pad padding size
  9463. * @param[in] stride convolution stride
  9464. * @param[in] pre_rshift right shift amount for the output
  9465. * @param[in] out_scale value of scaling for the output
  9466. * @param[in] post_rshift right shift amount for the output
  9467. * @param[out] out_tensor pointer of the output tensor
  9468. * @param[in] out_tensor_dim dimension of the output tensor
  9469. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9470. * required when -mext-dsp or -mext-vector
  9471. * enabled and its size must be "2 * (3 *
  9472. * ker_dim * ker_dim + 1)".
  9473. * @param[in] wt_tmp_buf temporary buffer for kernel weights. It is
  9474. * required when -mext-dsp or -mext-vector
  9475. * enabled and its size must be "out_tensor_ch *
  9476. * (3 * ker_dim * ker_dim + 1)".
  9477. * @return This function only returns 0.
  9478. *
  9479. * @note
  9480. * The outputs will be 2-stage shifted before being stored, i.e.,
  9481. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9482. */
  9483. static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(const u8_t *in_tensor,
  9484. const uint16_t in_tensor_dim,
  9485. const q7_t *ker_weight,
  9486. const uint16_t out_tensor_ch,
  9487. const uint16_t ker_dim,
  9488. const uint16_t pad,
  9489. const uint16_t stride,
  9490. const uint16_t pre_rshift,
  9491. const uint16_t out_scale,
  9492. const uint16_t post_rshift,
  9493. q15_t *out_tensor,
  9494. const uint16_t out_tensor_dim,
  9495. q15_t *in_tmp_buf,
  9496. q15_t *wt_tmp_buf)
  9497. {
  9498. #if defined(__zcc__)
  9499. return tpt_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(
  9500. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9501. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9502. in_tmp_buf, wt_tmp_buf);
  9503. #else
  9504. return riscv_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(
  9505. in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
  9506. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
  9507. in_tmp_buf, wt_tmp_buf);
  9508. #endif
  9509. }
  9510. /**
  9511. * @brief This function performs fast convolution for signed 8-bit
  9512. * integer inputs/outputs with bias inputs and symmetric
  9513. * quantization on the outputs.
  9514. * @param[in] in_tensor pointer of the input vector
  9515. * @param[in] in_tensor_dim dimension of the input tensor
  9516. * @param[in] in_tensor_ch number of input tensor channels
  9517. * @param[in] ker_weight pointer of kernel weights
  9518. * @param[in] out_tensor_ch number of output tensor channels
  9519. * @param[in] ker_dim dimension of the filter kernel
  9520. * @param[in] pad padding size
  9521. * @param[in] stride convolution stride
  9522. * @param[in] bias pointer of the bias vector
  9523. * @param[in] pre_rshift right shift amount for the output
  9524. * @param[in] out_scale value of scaling for the output
  9525. * @param[in] post_rshift right shift amount for the output
  9526. * @param[out] out_tensor pointer of the output tensor
  9527. * @param[in] out_tensor_dim dimension of the output tensor
  9528. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9529. * required when -mext-dsp or -mext-vector
  9530. * enabled and its size must be equal to "2 *
  9531. * in_tensor_ch * ker_dim * ker_dim".
  9532. * @return This function returns 0 on success; otherwise, it returns -1
  9533. * if its inputs do not meet the constraints that in_tensor_ch
  9534. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  9535. *
  9536. * @note
  9537. * The outputs will be 2-stage shifted before being stored, i.e.,
  9538. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9539. */
  9540. static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast(const q7_t *in_tensor,
  9541. const uint16_t in_tensor_dim,
  9542. const uint16_t in_tensor_ch,
  9543. const q7_t *ker_weight,
  9544. const uint16_t out_tensor_ch,
  9545. const uint16_t ker_dim,
  9546. const uint16_t pad,
  9547. const uint16_t stride,
  9548. const q31_t *bias,
  9549. const uint16_t pre_rshift,
  9550. const uint16_t out_scale,
  9551. const uint16_t post_rshift,
  9552. q7_t *out_tensor,
  9553. const uint16_t out_tensor_dim,
  9554. q15_t *in_tmp_buf)
  9555. {
  9556. #if defined(__zcc__)
  9557. return tpt_nn_conv_HWC_s8_s8_s8_sym_bias_fast(
  9558. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9559. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  9560. out_tensor, out_tensor_dim, in_tmp_buf);
  9561. #else
  9562. return riscv_nn_conv_HWC_s8_s8_s8_sym_bias_fast(
  9563. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9564. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  9565. out_tensor, out_tensor_dim, in_tmp_buf);
  9566. #endif
  9567. }
  9568. /**
  9569. * @brief This function performs fast convolution for signed 8-bit
  9570. * integer inputs and signed 16-bit integer outputs with bias
  9571. * inputs and symmetric quantization on the outputs.
  9572. * @param[in] in_tensor pointer of the input vector
  9573. * @param[in] in_tensor_dim dimension of the input tensor
  9574. * @param[in] in_tensor_ch number of input tensor channels
  9575. * @param[in] ker_weight pointer of kernel weights
  9576. * @param[in] out_tensor_ch number of output tensor channels
  9577. * @param[in] ker_dim dimension of the filter kernel
  9578. * @param[in] pad padding size
  9579. * @param[in] stride convolution stride
  9580. * @param[in] bias pointer of the bias vector
  9581. * @param[in] pre_rshift right shift amount for the output
  9582. * @param[in] out_scale value of scaling for the output
  9583. * @param[in] post_rshift right shift amount for the output
  9584. * @param[out] out_tensor pointer of the output tensor
  9585. * @param[in] out_tensor_dim dimension of the output tensor
  9586. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9587. * required when -mext-dsp or -mext-vector
  9588. * enabled and its size must be equal to "2 *
  9589. * in_tensor_ch * ker_dim * ker_dim".
  9590. * @return This function returns 0 on success; otherwise, it returns -1
  9591. * if its inputs do not meet the constraints that in_tensor_ch
  9592. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  9593. *
  9594. * @note
  9595. * The outputs will be 2-stage shifted before being stored, i.e.,
  9596. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9597. */
  9598. static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast(const q7_t *in_tensor,
  9599. const uint16_t in_tensor_dim,
  9600. const uint16_t in_tensor_ch,
  9601. const q7_t *ker_weight,
  9602. const uint16_t out_tensor_ch,
  9603. const uint16_t ker_dim,
  9604. const uint16_t pad,
  9605. const uint16_t stride,
  9606. const q31_t *bias,
  9607. const uint16_t pre_rshift,
  9608. const uint16_t out_scale,
  9609. const uint16_t post_rshift,
  9610. q15_t *out_tensor,
  9611. const uint16_t out_tensor_dim,
  9612. q15_t *in_tmp_buf)
  9613. {
  9614. #if defined(__zcc__)
  9615. return tpt_nn_conv_HWC_s8_s16_s8_sym_bias_fast(
  9616. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9617. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  9618. out_tensor, out_tensor_dim, in_tmp_buf);
  9619. #else
  9620. return riscv_nn_conv_HWC_s8_s16_s8_sym_bias_fast(
  9621. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9622. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  9623. out_tensor, out_tensor_dim, in_tmp_buf);
  9624. #endif
  9625. }
  9626. /**
  9627. * @brief This function performs fast convolution for unsigned 8-bit
  9628. * integer inputs/outputs with bias inputs and symmetric
  9629. * quantization on the outputs.
  9630. * @param[in] in_tensor pointer of the input vector
  9631. * @param[in] in_tensor_dim dimension of the input tensor
  9632. * @param[in] in_tensor_ch number of input tensor channels
  9633. * @param[in] ker_weight pointer of kernel weights
  9634. * @param[in] out_tensor_ch number of output tensor channels
  9635. * @param[in] ker_dim dimension of the filter kernel
  9636. * @param[in] pad padding size
  9637. * @param[in] stride convolution stride
  9638. * @param[in] bias pointer of the bias vector
  9639. * @param[in] pre_rshift right shift amount for the output
  9640. * @param[in] out_scale value of scaling for the output
  9641. * @param[in] post_rshift right shift amount for the output
  9642. * @param[out] out_tensor pointer of the output tensor
  9643. * @param[in] out_tensor_dim dimension of the output tensor
  9644. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9645. * required when -mext-dsp or -mext-vector
  9646. * enabled and its size must be equal to "2 *
  9647. * in_tensor_ch * ker_dim * ker_dim".
  9648. * @return This function returns 0 on success; otherwise, it returns -1
  9649. * if its inputs do not meet the constraints that in_tensor_ch
  9650. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  9651. *
  9652. * @note
  9653. * The outputs will be 2-stage shifted before being stored, i.e.,
  9654. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9655. */
  9656. static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast(const u8_t *in_tensor,
  9657. const uint16_t in_tensor_dim,
  9658. const uint16_t in_tensor_ch,
  9659. const q7_t *ker_weight,
  9660. const uint16_t out_tensor_ch,
  9661. const uint16_t ker_dim,
  9662. const uint16_t pad,
  9663. const uint16_t stride,
  9664. const q31_t *bias,
  9665. const uint16_t pre_rshift,
  9666. const uint16_t out_scale,
  9667. const uint16_t post_rshift,
  9668. u8_t *out_tensor,
  9669. const uint16_t out_tensor_dim,
  9670. q15_t *in_tmp_buf)
  9671. {
  9672. #if defined(__zcc__)
  9673. return tpt_nn_conv_HWC_u8_u8_s8_sym_bias_fast(
  9674. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9675. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  9676. out_tensor, out_tensor_dim, in_tmp_buf);
  9677. #else
  9678. return riscv_nn_conv_HWC_u8_u8_s8_sym_bias_fast(
  9679. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9680. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  9681. out_tensor, out_tensor_dim, in_tmp_buf);
  9682. #endif
  9683. }
  9684. /**
  9685. * @brief This function performs fast convolution for unsigned 8-bit
  9686. * integer inputs and signed 8-bit integer outputs with bias
  9687. * inputs and symmetric quantization on the outputs.
  9688. * @param[in] in_tensor pointer of the input vector
  9689. * @param[in] in_tensor_dim dimension of the input tensor
  9690. * @param[in] in_tensor_ch number of input tensor channels
  9691. * @param[in] ker_weight pointer of kernel weights
  9692. * @param[in] out_tensor_ch number of output tensor channels
  9693. * @param[in] ker_dim dimension of the filter kernel
  9694. * @param[in] pad padding size
  9695. * @param[in] stride convolution stride
  9696. * @param[in] bias pointer of the bias vector
  9697. * @param[in] pre_rshift right shift amount for the output
  9698. * @param[in] out_scale value of scaling for the output
  9699. * @param[in] post_rshift right shift amount for the output
  9700. * @param[out] out_tensor pointer of the output tensor
  9701. * @param[in] out_tensor_dim dimension of the output tensor
  9702. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9703. * required when -mext-dsp or -mext-vector
  9704. * enabled and its size must be equal to "2 *
  9705. * in_tensor_ch * ker_dim * ker_dim".
  9706. * @return This function returns 0 on success; otherwise, it returns -1
  9707. * if its inputs do not meet the constraints that in_tensor_ch
  9708. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  9709. *
  9710. * @note
  9711. * The outputs will be 2-stage shifted before being stored, i.e.,
  9712. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9713. */
  9714. static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast(const u8_t *in_tensor,
  9715. const uint16_t in_tensor_dim,
  9716. const uint16_t in_tensor_ch,
  9717. const q7_t *ker_weight,
  9718. const uint16_t out_tensor_ch,
  9719. const uint16_t ker_dim,
  9720. const uint16_t pad,
  9721. const uint16_t stride,
  9722. const q31_t *bias,
  9723. const uint16_t pre_rshift,
  9724. const uint16_t out_scale,
  9725. const uint16_t post_rshift,
  9726. q7_t *out_tensor,
  9727. const uint16_t out_tensor_dim,
  9728. q15_t *in_tmp_buf)
  9729. {
  9730. #if defined(__zcc__)
  9731. return tpt_nn_conv_HWC_u8_s8_s8_sym_bias_fast(
  9732. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9733. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  9734. out_tensor, out_tensor_dim, in_tmp_buf);
  9735. #else
  9736. return riscv_nn_conv_HWC_u8_s8_s8_sym_bias_fast(
  9737. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9738. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  9739. out_tensor, out_tensor_dim, in_tmp_buf);
  9740. #endif
  9741. }
  9742. /**
  9743. * @brief This function performs fast convolution for unsigned 8-bit
  9744. * integer inputs and signed 16-bit integer outputs with bias
  9745. * inputs and symmetric quantization on the outputs.
  9746. * @param[in] in_tensor pointer of the input vector
  9747. * @param[in] in_tensor_dim dimension of the input tensor
  9748. * @param[in] in_tensor_ch number of input tensor channels
  9749. * @param[in] ker_weight pointer of kernel weights
  9750. * @param[in] out_tensor_ch number of output tensor channels
  9751. * @param[in] ker_dim dimension of the filter kernel
  9752. * @param[in] pad padding size
  9753. * @param[in] stride convolution stride
  9754. * @param[in] bias pointer of the bias vector
  9755. * @param[in] pre_rshift right shift amount for the output
  9756. * @param[in] out_scale value of scaling for the output
  9757. * @param[in] post_rshift right shift amount for the output
  9758. * @param[out] out_tensor pointer of the output tensor
  9759. * @param[in] out_tensor_dim dimension of the output tensor
  9760. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9761. * required when -mext-dsp or -mext-vector
  9762. * enabled and its size must be equal to "2 *
  9763. * in_tensor_ch * ker_dim * ker_dim".
  9764. * @return This function returns 0 on success; otherwise, it returns -1
  9765. * if its inputs do not meet the constraints that in_tensor_ch
  9766. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  9767. *
  9768. * @note
  9769. * The outputs will be 2-stage shifted before being stored, i.e.,
  9770. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9771. */
  9772. static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast(const u8_t *in_tensor,
  9773. const uint16_t in_tensor_dim,
  9774. const uint16_t in_tensor_ch,
  9775. const q7_t *ker_weight,
  9776. const uint16_t out_tensor_ch,
  9777. const uint16_t ker_dim,
  9778. const uint16_t pad,
  9779. const uint16_t stride,
  9780. const q31_t *bias,
  9781. const uint16_t pre_rshift,
  9782. const uint16_t out_scale,
  9783. const uint16_t post_rshift,
  9784. q15_t *out_tensor,
  9785. const uint16_t out_tensor_dim,
  9786. q15_t *in_tmp_buf)
  9787. {
  9788. #if defined(__zcc__)
  9789. return tpt_nn_conv_HWC_u8_s16_s8_sym_bias_fast(
  9790. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9791. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  9792. out_tensor, out_tensor_dim, in_tmp_buf);
  9793. #else
  9794. return riscv_nn_conv_HWC_u8_s16_s8_sym_bias_fast(
  9795. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9796. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  9797. out_tensor, out_tensor_dim, in_tmp_buf);
  9798. #endif
  9799. }
  9800. /**
  9801. * @brief This function performs fast convolution for signed 8-bit
  9802. * integer inputs/outputs with symmetric quantization on the
  9803. * outputs.
  9804. * @param[in] in_tensor pointer of the input vector
  9805. * @param[in] in_tensor_dim dimension of the input tensor
  9806. * @param[in] in_tensor_ch number of input tensor channels
  9807. * @param[in] ker_weight pointer of kernel weights
  9808. * @param[in] out_tensor_ch number of output tensor channels
  9809. * @param[in] ker_dim dimension of the filter kernel
  9810. * @param[in] pad padding size
  9811. * @param[in] stride convolution stride
  9812. * @param[in] pre_rshift right shift amount for the output
  9813. * @param[in] out_scale value of scaling for the output
  9814. * @param[in] post_rshift right shift amount for the output
  9815. * @param[out] out_tensor pointer of the output tensor
  9816. * @param[in] out_tensor_dim dimension of the output tensor
  9817. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9818. * required when -mext-dsp or -mext-vector
  9819. * enabled and its size must be equal to "2 *
  9820. * in_tensor_ch * ker_dim * ker_dim".
  9821. * @return This function returns 0 on success; otherwise, it returns -1
  9822. * if its inputs do not meet the constraints that in_tensor_ch
  9823. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  9824. *
  9825. * @note
  9826. * The outputs will be 2-stage shifted before being stored, i.e.,
  9827. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9828. */
  9829. static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast(const q7_t *in_tensor,
  9830. const uint16_t in_tensor_dim,
  9831. const uint16_t in_tensor_ch,
  9832. const q7_t *ker_weight,
  9833. const uint16_t out_tensor_ch,
  9834. const uint16_t ker_dim,
  9835. const uint16_t pad,
  9836. const uint16_t stride,
  9837. const uint16_t pre_rshift,
  9838. const uint16_t out_scale,
  9839. const uint16_t post_rshift,
  9840. q7_t *out_tensor,
  9841. const uint16_t out_tensor_dim,
  9842. q15_t *in_tmp_buf)
  9843. {
  9844. #if defined(__zcc__)
  9845. return tpt_nn_conv_HWC_s8_s8_s8_sym_fast(
  9846. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9847. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  9848. out_tensor_dim, in_tmp_buf);
  9849. #else
  9850. return riscv_nn_conv_HWC_s8_s8_s8_sym_fast(
  9851. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9852. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  9853. out_tensor_dim, in_tmp_buf);
  9854. #endif
  9855. }
  9856. /**
  9857. * @brief This function performs fast convolution for signed 8-bit
  9858. * integer inputs and signed 16-bit integer outputs with
  9859. * symmetric quantization on the outputs.
  9860. * @param[in] in_tensor pointer of the input vector
  9861. * @param[in] in_tensor_dim dimension of the input tensor
  9862. * @param[in] in_tensor_ch number of input tensor channels
  9863. * @param[in] ker_weight pointer of kernel weights
  9864. * @param[in] out_tensor_ch number of output tensor channels
  9865. * @param[in] ker_dim dimension of the filter kernel
  9866. * @param[in] pad padding size
  9867. * @param[in] stride convolution stride
  9868. * @param[in] pre_rshift right shift amount for the output
  9869. * @param[in] out_scale value of scaling for the output
  9870. * @param[in] post_rshift right shift amount for the output
  9871. * @param[out] out_tensor pointer of the output tensor
  9872. * @param[in] out_tensor_dim dimension of the output tensor
  9873. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9874. * required when -mext-dsp or -mext-vector
  9875. * enabled and its size must be equal to "2 *
  9876. * in_tensor_ch * ker_dim * ker_dim".
  9877. * @return This function returns 0 on success; otherwise, it returns -1
  9878. * if its inputs do not meet the constraints that in_tensor_ch
  9879. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  9880. *
  9881. * @note
  9882. * The outputs will be 2-stage shifted before being stored, i.e.,
  9883. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9884. */
  9885. static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast(const q7_t *in_tensor,
  9886. const uint16_t in_tensor_dim,
  9887. const uint16_t in_tensor_ch,
  9888. const q7_t *ker_weight,
  9889. const uint16_t out_tensor_ch,
  9890. const uint16_t ker_dim,
  9891. const uint16_t pad,
  9892. const uint16_t stride,
  9893. const uint16_t pre_rshift,
  9894. const uint16_t out_scale,
  9895. const uint16_t post_rshift,
  9896. q15_t *out_tensor,
  9897. const uint16_t out_tensor_dim,
  9898. q15_t *in_tmp_buf)
  9899. {
  9900. #if defined(__zcc__)
  9901. return tpt_nn_conv_HWC_s8_s16_s8_sym_fast(
  9902. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9903. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  9904. out_tensor_dim, in_tmp_buf);
  9905. #else
  9906. return riscv_nn_conv_HWC_s8_s16_s8_sym_fast(
  9907. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9908. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  9909. out_tensor_dim, in_tmp_buf);
  9910. #endif
  9911. }
  9912. /**
  9913. * @brief This function performs fast convolution for unsigned 8-bit
  9914. * integer inputs/outputs with symmetric quantization on
  9915. * the outputs.
  9916. * @param[in] in_tensor pointer of the input vector
  9917. * @param[in] in_tensor_dim dimension of the input tensor
  9918. * @param[in] in_tensor_ch number of input tensor channels
  9919. * @param[in] ker_weight pointer of kernel weights
  9920. * @param[in] out_tensor_ch number of output tensor channels
  9921. * @param[in] ker_dim dimension of the filter kernel
  9922. * @param[in] pad padding size
  9923. * @param[in] stride convolution stride
  9924. * @param[in] pre_rshift right shift amount for the output
  9925. * @param[in] out_scale value of scaling for the output
  9926. * @param[in] post_rshift right shift amount for the output
  9927. * @param[out] out_tensor pointer of the output tensor
  9928. * @param[in] out_tensor_dim dimension of the output tensor
  9929. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9930. * required when -mext-dsp or -mext-vector
  9931. * enabled and its size must be equal to "2 *
  9932. * in_tensor_ch * ker_dim * ker_dim".
  9933. * @return This function returns 0 on success; otherwise, it returns -1
  9934. * if its inputs do not meet the constraints that in_tensor_ch
  9935. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  9936. *
  9937. * @note
  9938. * The outputs will be 2-stage shifted before being stored, i.e.,
  9939. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9940. */
  9941. static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast(const u8_t *in_tensor,
  9942. const uint16_t in_tensor_dim,
  9943. const uint16_t in_tensor_ch,
  9944. const q7_t *ker_weight,
  9945. const uint16_t out_tensor_ch,
  9946. const uint16_t ker_dim,
  9947. const uint16_t pad,
  9948. const uint16_t stride,
  9949. const uint16_t pre_rshift,
  9950. const uint16_t out_scale,
  9951. const uint16_t post_rshift,
  9952. u8_t *out_tensor,
  9953. const uint16_t out_tensor_dim,
  9954. q15_t *in_tmp_buf)
  9955. {
  9956. #if defined(__zcc__)
  9957. return tpt_nn_conv_HWC_u8_u8_s8_sym_fast(
  9958. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9959. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  9960. out_tensor_dim, in_tmp_buf);
  9961. #else
  9962. return riscv_nn_conv_HWC_u8_u8_s8_sym_fast(
  9963. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  9964. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  9965. out_tensor_dim, in_tmp_buf);
  9966. #endif
  9967. }
  9968. /**
  9969. * @brief This function performs fast convolution for unsigned 8-bit
  9970. * integer inputs and signed 8-bit integer outputs with
  9971. * symmetric quantization on the outputs.
  9972. * @param[in] in_tensor pointer of the input vector
  9973. * @param[in] in_tensor_dim dimension of the input tensor
  9974. * @param[in] in_tensor_ch number of input tensor channels
  9975. * @param[in] ker_weight pointer of kernel weights
  9976. * @param[in] out_tensor_ch number of output tensor channels
  9977. * @param[in] ker_dim dimension of the filter kernel
  9978. * @param[in] pad padding size
  9979. * @param[in] stride convolution stride
  9980. * @param[in] pre_rshift right shift amount for the output
  9981. * @param[in] out_scale value of scaling for the output
  9982. * @param[in] post_rshift right shift amount for the output
  9983. * @param[out] out_tensor pointer of the output tensor
  9984. * @param[in] out_tensor_dim dimension of the output tensor
  9985. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  9986. * required when -mext-dsp or -mext-vector
  9987. * enabled and its size must be equal to "2 *
  9988. * in_tensor_ch * ker_dim * ker_dim".
  9989. * @return This function returns 0 on success; otherwise, it returns -1
  9990. * if its inputs do not meet the constraints that in_tensor_ch
  9991. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  9992. *
  9993. * @note
  9994. * The outputs will be 2-stage shifted before being stored, i.e.,
  9995. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  9996. */
  9997. static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast(const u8_t *in_tensor,
  9998. const uint16_t in_tensor_dim,
  9999. const uint16_t in_tensor_ch,
  10000. const q7_t *ker_weight,
  10001. const uint16_t out_tensor_ch,
  10002. const uint16_t ker_dim,
  10003. const uint16_t pad,
  10004. const uint16_t stride,
  10005. const uint16_t pre_rshift,
  10006. const uint16_t out_scale,
  10007. const uint16_t post_rshift,
  10008. q7_t *out_tensor,
  10009. const uint16_t out_tensor_dim,
  10010. q15_t *in_tmp_buf)
  10011. {
  10012. #if defined(__zcc__)
  10013. return tpt_nn_conv_HWC_u8_s8_s8_sym_fast(
  10014. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  10015. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  10016. out_tensor_dim, in_tmp_buf);
  10017. #else
  10018. return riscv_nn_conv_HWC_u8_s8_s8_sym_fast(
  10019. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  10020. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  10021. out_tensor_dim, in_tmp_buf);
  10022. #endif
  10023. }
  10024. /**
  10025. * @brief This function performs fast convolution for unsigned 8-bit
  10026. * integer inputs and signed 16-bit integer outputs with
  10027. * symmetric quantization on the outputs.
  10028. * @param[in] in_tensor pointer of the input vector
  10029. * @param[in] in_tensor_dim dimension of the input tensor
  10030. * @param[in] in_tensor_ch number of input tensor channels
  10031. * @param[in] ker_weight pointer of kernel weights
  10032. * @param[in] out_tensor_ch number of output tensor channels
  10033. * @param[in] ker_dim dimension of the filter kernel
  10034. * @param[in] pad padding size
  10035. * @param[in] stride convolution stride
  10036. * @param[in] pre_rshift right shift amount for the output
  10037. * @param[in] out_scale value of scaling for the output
  10038. * @param[in] post_rshift right shift amount for the output
  10039. * @param[out] out_tensor pointer of the output tensor
  10040. * @param[in] out_tensor_dim dimension of the output tensor
  10041. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  10042. * required when -mext-dsp or -mext-vector
  10043. * enabled and its size must be equal to "2 *
  10044. * in_tensor_ch * ker_dim * ker_dim".
  10045. * @return This function returns 0 on success; otherwise, it returns -1
  10046. * if its inputs do not meet the constraints that in_tensor_ch
  10047. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  10048. *
  10049. * @note
  10050. * The outputs will be 2-stage shifted before being stored, i.e.,
  10051. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  10052. */
  10053. static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast(const u8_t *in_tensor,
  10054. const uint16_t in_tensor_dim,
  10055. const uint16_t in_tensor_ch,
  10056. const q7_t *ker_weight,
  10057. const uint16_t out_tensor_ch,
  10058. const uint16_t ker_dim,
  10059. const uint16_t pad,
  10060. const uint16_t stride,
  10061. const uint16_t pre_rshift,
  10062. const uint16_t out_scale,
  10063. const uint16_t post_rshift,
  10064. q15_t *out_tensor,
  10065. const uint16_t out_tensor_dim,
  10066. q15_t *in_tmp_buf)
  10067. {
  10068. #if defined(__zcc__)
  10069. return tpt_nn_conv_HWC_u8_s16_s8_sym_fast(
  10070. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  10071. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  10072. out_tensor_dim, in_tmp_buf);
  10073. #else
  10074. return riscv_nn_conv_HWC_u8_s16_s8_sym_fast(
  10075. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  10076. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  10077. out_tensor_dim, in_tmp_buf);
  10078. #endif
  10079. }
  10080. /**
  10081. * @brief This function performs fast convolution for signed 8-bit
  10082. * integer inputs/outputs in any x and y dimensions with bias
  10083. * inputs and symmetric quantization on the outputs.
  10084. * @param[in] in_tensor pointer of the input vector
  10085. * @param[in] in_tensor_dim_x x dimension of the input tensor
  10086. * @param[in] in_tensor_dim_y y dimension of the input tensor
  10087. * @param[in] in_tensor_ch number of input tensor channels
  10088. * @param[in] ker_weight pointer of kernel weights
  10089. * @param[in] out_tensor_ch number of output tensor channels
  10090. * @param[in] ker_dim_x x dimension of the filter kernel
  10091. * @param[in] ker_dim_y y dimension of the filter kernel
  10092. * @param[in] pad_x padding size in the x dimension
  10093. * @param[in] pad_y padding size in the y dimension
  10094. * @param[in] stride_x convolution stride in the x dimension
  10095. * @param[in] stride_y convolution stride in the y dimension
  10096. * @param[in] bias pointer of the bias vector
  10097. * @param[in] pre_rshift right shift amount for the output
  10098. * @param[in] out_scale value of scaling for the output
  10099. * @param[in] post_rshift right shift amount for the output
  10100. * @param[out] out_tensor pointer of the output tensor
  10101. * @param[in] out_tensor_dim_x x dimension of the output tensor
  10102. * @param[in] out_tensor_dim_y y dimension of the output tensor
  10103. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  10104. * It is required when -mext-dsp or
  10105. * -mext-vector enabled and its size must
  10106. * be equal to "2 * in_tensor_ch * ker_dim_x
  10107. * * ker_dim_y".
  10108. * @return This function returns 0 on success; otherwise, it returns -1
  10109. * if its inputs do not meet the constraints that in_tensor_ch
  10110. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  10111. *
  10112. * @note
  10113. * The outputs will be 2-stage shifted before being stored, i.e.,
  10114. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  10115. */
  10116. static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(const q7_t *in_tensor,
  10117. const uint16_t in_tensor_dim_x,
  10118. const uint16_t in_tensor_dim_y,
  10119. const uint16_t in_tensor_ch,
  10120. const q7_t *ker_weight,
  10121. const uint16_t out_tensor_ch,
  10122. const uint16_t ker_dim_x,
  10123. const uint16_t ker_dim_y,
  10124. const uint16_t pad_x,
  10125. const uint16_t pad_y,
  10126. const uint16_t stride_x,
  10127. const uint16_t stride_y,
  10128. const q31_t *bias,
  10129. const uint16_t pre_rshift,
  10130. const uint16_t out_scale,
  10131. const uint16_t post_rshift,
  10132. q7_t *out_tensor,
  10133. const uint16_t out_tensor_dim_x,
  10134. const uint16_t out_tensor_dim_y,
  10135. q15_t *in_tmp_buf)
  10136. {
  10137. #if defined(__zcc__)
  10138. return tpt_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(
  10139. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10140. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10141. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10142. out_tensor_dim_y, in_tmp_buf);
  10143. #else
  10144. return riscv_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(
  10145. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10146. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10147. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10148. out_tensor_dim_y, in_tmp_buf);
  10149. #endif
  10150. }
  10151. /**
  10152. * @brief This function performs fast convolution for signed 8-bit
  10153. * integer inputs and signed 16-bit integer outputs in any x
  10154. * and y dimensions with bias inputs and symmetric quantization
  10155. * on the outputs.
  10156. * @param[in] in_tensor pointer of the input vector
  10157. * @param[in] in_tensor_dim_x x dimension of the input tensor
  10158. * @param[in] in_tensor_dim_y y dimension of the input tensor
  10159. * @param[in] in_tensor_ch number of input tensor channels
  10160. * @param[in] ker_weight pointer of kernel weights
  10161. * @param[in] out_tensor_ch number of output tensor channels
  10162. * @param[in] ker_dim_x x dimension of the filter kernel
  10163. * @param[in] ker_dim_y y dimension of the filter kernel
  10164. * @param[in] pad_x padding size in the x dimension
  10165. * @param[in] pad_y padding size in the y dimension
  10166. * @param[in] stride_x convolution stride in the x dimension
  10167. * @param[in] stride_y convolution stride in the y dimension
  10168. * @param[in] bias pointer of the bias vector
  10169. * @param[in] pre_rshift right shift amount for the output
  10170. * @param[in] out_scale value of scaling for the output
  10171. * @param[in] post_rshift right shift amount for the output
  10172. * @param[out] out_tensor pointer of the output tensor
  10173. * @param[in] out_tensor_dim_x x dimension of the output tensor
  10174. * @param[in] out_tensor_dim_y y dimension of the output tensor
  10175. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  10176. * It is required when -mext-dsp or
  10177. * -mext-vector enabled and its size must
  10178. * be equal to "2 * in_tensor_ch * ker_dim_x
  10179. * * ker_dim_y".
  10180. * @return This function returns 0 on success; otherwise, it returns -1
  10181. * if its inputs do not meet the constraints that in_tensor_ch
  10182. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  10183. *
  10184. * @note
  10185. * The outputs will be 2-stage shifted before being stored, i.e.,
  10186. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  10187. */
  10188. static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(const q7_t *in_tensor,
  10189. const uint16_t in_tensor_dim_x,
  10190. const uint16_t in_tensor_dim_y,
  10191. const uint16_t in_tensor_ch,
  10192. const q7_t *ker_weight,
  10193. const uint16_t out_tensor_ch,
  10194. const uint16_t ker_dim_x,
  10195. const uint16_t ker_dim_y,
  10196. const uint16_t pad_x,
  10197. const uint16_t pad_y,
  10198. const uint16_t stride_x,
  10199. const uint16_t stride_y,
  10200. const q31_t *bias,
  10201. const uint16_t pre_rshift,
  10202. const uint16_t out_scale,
  10203. const uint16_t post_rshift,
  10204. q15_t *out_tensor,
  10205. const uint16_t out_tensor_dim_x,
  10206. const uint16_t out_tensor_dim_y,
  10207. q15_t *in_tmp_buf)
  10208. {
  10209. #if defined(__zcc__)
  10210. return tpt_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(
  10211. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10212. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10213. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10214. out_tensor_dim_y, in_tmp_buf);
  10215. #else
  10216. return riscv_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(
  10217. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10218. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10219. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10220. out_tensor_dim_y, in_tmp_buf);
  10221. #endif
  10222. }
  10223. /**
  10224. * @brief This function performs fast convolution for unsigned 8-bit
  10225. * integer inputs/outputs in any x and y dimensions with bias
  10226. * inputs and symmetric quantization on the outputs.
  10227. * @param[in] in_tensor pointer of the input vector
  10228. * @param[in] in_tensor_dim_x x dimension of the input tensor
  10229. * @param[in] in_tensor_dim_y y dimension of the input tensor
  10230. * @param[in] in_tensor_ch number of input tensor channels
  10231. * @param[in] ker_weight pointer of kernel weights
  10232. * @param[in] out_tensor_ch number of output tensor channels
  10233. * @param[in] ker_dim_x x dimension of the filter kernel
  10234. * @param[in] ker_dim_y y dimension of the filter kernel
  10235. * @param[in] pad_x padding size in the x dimension
  10236. * @param[in] pad_y padding size in the y dimension
  10237. * @param[in] stride_x convolution stride in the x dimension
  10238. * @param[in] stride_y convolution stride in the y dimension
  10239. * @param[in] bias pointer of the bias vector
  10240. * @param[in] pre_rshift right shift amount for the output
  10241. * @param[in] out_scale value of scaling for the output
  10242. * @param[in] post_rshift right shift amount for the output
  10243. * @param[out] out_tensor pointer of the output tensor
  10244. * @param[in] out_tensor_dim_x x dimension of the output tensor
  10245. * @param[in] out_tensor_dim_y y dimension of the output tensor
  10246. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  10247. * It is required when -mext-dsp or
  10248. * -mext-vector enabled and its size must
  10249. * be equal to "2 * in_tensor_ch * ker_dim_x
  10250. * * ker_dim_y".
  10251. * @return This function returns 0 on success; otherwise, it returns -1
  10252. * if its inputs do not meet the constraints that in_tensor_ch
  10253. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  10254. *
  10255. * @note
  10256. * The outputs will be 2-stage shifted before being stored, i.e.,
  10257. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  10258. */
  10259. static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(const u8_t *in_tensor,
  10260. const uint16_t in_tensor_dim_x,
  10261. const uint16_t in_tensor_dim_y,
  10262. const uint16_t in_tensor_ch,
  10263. const q7_t *ker_weight,
  10264. const uint16_t out_tensor_ch,
  10265. const uint16_t ker_dim_x,
  10266. const uint16_t ker_dim_y,
  10267. const uint16_t pad_x,
  10268. const uint16_t pad_y,
  10269. const uint16_t stride_x,
  10270. const uint16_t stride_y,
  10271. const q31_t *bias,
  10272. const uint16_t pre_rshift,
  10273. const uint16_t out_scale,
  10274. const uint16_t post_rshift,
  10275. u8_t *out_tensor,
  10276. const uint16_t out_tensor_dim_x,
  10277. const uint16_t out_tensor_dim_y,
  10278. q15_t *in_tmp_buf)
  10279. {
  10280. #if defined(__zcc__)
  10281. return tpt_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(
  10282. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10283. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10284. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10285. out_tensor_dim_y, in_tmp_buf);
  10286. #else
  10287. return riscv_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(
  10288. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10289. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10290. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10291. out_tensor_dim_y, in_tmp_buf);
  10292. #endif
  10293. }
  10294. /**
  10295. * @brief This function performs fast convolution for unsigned 8-bit
  10296. * integer inputs and signed 8-bit integer outputs in any x and
  10297. * y dimensions with bias inputs and symmetric quantization on
  10298. * the outputs.
  10299. * @param[in] in_tensor pointer of the input vector
  10300. * @param[in] in_tensor_dim_x x dimension of the input tensor
  10301. * @param[in] in_tensor_dim_y y dimension of the input tensor
  10302. * @param[in] in_tensor_ch number of input tensor channels
  10303. * @param[in] ker_weight pointer of kernel weights
  10304. * @param[in] out_tensor_ch number of output tensor channels
  10305. * @param[in] ker_dim_x x dimension of the filter kernel
  10306. * @param[in] ker_dim_y y dimension of the filter kernel
  10307. * @param[in] pad_x padding size in the x dimension
  10308. * @param[in] pad_y padding size in the y dimension
  10309. * @param[in] stride_x convolution stride in the x dimension
  10310. * @param[in] stride_y convolution stride in the y dimension
  10311. * @param[in] bias pointer of the bias vector
  10312. * @param[in] pre_rshift right shift amount for the output
  10313. * @param[in] out_scale value of scaling for the output
  10314. * @param[in] post_rshift right shift amount for the output
  10315. * @param[out] out_tensor pointer of the output tensor
  10316. * @param[in] out_tensor_dim_x x dimension of the output tensor
  10317. * @param[in] out_tensor_dim_y y dimension of the output tensor
  10318. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  10319. * It is required when -mext-dsp or
  10320. * -mext-vector enabled and its size must
  10321. * be equal to "2 * in_tensor_ch * ker_dim_x
  10322. * * ker_dim_y".
  10323. * @return This function returns 0 on success; otherwise, it returns -1
  10324. * if its inputs do not meet the constraints that in_tensor_ch
  10325. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  10326. *
  10327. * @note
  10328. * The outputs will be 2-stage shifted before being stored, i.e.,
  10329. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  10330. */
  10331. static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(const u8_t *in_tensor,
  10332. const uint16_t in_tensor_dim_x,
  10333. const uint16_t in_tensor_dim_y,
  10334. const uint16_t in_tensor_ch,
  10335. const q7_t *ker_weight,
  10336. const uint16_t out_tensor_ch,
  10337. const uint16_t ker_dim_x,
  10338. const uint16_t ker_dim_y,
  10339. const uint16_t pad_x,
  10340. const uint16_t pad_y,
  10341. const uint16_t stride_x,
  10342. const uint16_t stride_y,
  10343. const q31_t *bias,
  10344. const uint16_t pre_rshift,
  10345. const uint16_t out_scale,
  10346. const uint16_t post_rshift,
  10347. q7_t *out_tensor,
  10348. const uint16_t out_tensor_dim_x,
  10349. const uint16_t out_tensor_dim_y,
  10350. q15_t *in_tmp_buf)
  10351. {
  10352. #if defined(__zcc__)
  10353. return tpt_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(
  10354. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10355. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10356. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10357. out_tensor_dim_y, in_tmp_buf);
  10358. #else
  10359. return riscv_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(
  10360. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10361. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10362. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10363. out_tensor_dim_y, in_tmp_buf);
  10364. #endif
  10365. }
  10366. /**
  10367. * @brief This function performs fast convolution for unsigned 8-bit
  10368. * integer inputs and signed 16-bit integer outputs in any x
  10369. * and y dimensions with bias inputs and symmetric quantization
  10370. * on the outputs.
  10371. * @param[in] in_tensor pointer of the input vector
  10372. * @param[in] in_tensor_dim_x x dimension of the input tensor
  10373. * @param[in] in_tensor_dim_y y dimension of the input tensor
  10374. * @param[in] in_tensor_ch number of input tensor channels
  10375. * @param[in] ker_weight pointer of kernel weights
  10376. * @param[in] out_tensor_ch number of output tensor channels
  10377. * @param[in] ker_dim_x x dimension of the filter kernel
  10378. * @param[in] ker_dim_y y dimension of the filter kernel
  10379. * @param[in] pad_x padding size in the x dimension
  10380. * @param[in] pad_y padding size in the y dimension
  10381. * @param[in] stride_x convolution stride in the x dimension
  10382. * @param[in] stride_y convolution stride in the y dimension
  10383. * @param[in] bias pointer of the bias vector
  10384. * @param[in] pre_rshift right shift amount for the output
  10385. * @param[in] out_scale value of scaling for the output
  10386. * @param[in] post_rshift right shift amount for the output
  10387. * @param[out] out_tensor pointer of the output tensor
  10388. * @param[in] out_tensor_dim_x x dimension of the output tensor
  10389. * @param[in] out_tensor_dim_y y dimension of the output tensor
  10390. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  10391. * It is required when -mext-dsp or
  10392. * -mext-vector enabled and its size must
  10393. * be equal to "2 * in_tensor_ch * ker_dim_x
  10394. * * ker_dim_y".
  10395. * @return This function returns 0 on success; otherwise, it returns -1
  10396. * if its inputs do not meet the constraints that in_tensor_ch
  10397. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  10398. *
  10399. * @note
  10400. * The outputs will be 2-stage shifted before being stored, i.e.,
  10401. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  10402. */
  10403. static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(const u8_t *in_tensor,
  10404. const uint16_t in_tensor_dim_x,
  10405. const uint16_t in_tensor_dim_y,
  10406. const uint16_t in_tensor_ch,
  10407. const q7_t *ker_weight,
  10408. const uint16_t out_tensor_ch,
  10409. const uint16_t ker_dim_x,
  10410. const uint16_t ker_dim_y,
  10411. const uint16_t pad_x,
  10412. const uint16_t pad_y,
  10413. const uint16_t stride_x,
  10414. const uint16_t stride_y,
  10415. const q31_t *bias,
  10416. const uint16_t pre_rshift,
  10417. const uint16_t out_scale,
  10418. const uint16_t post_rshift,
  10419. q15_t *out_tensor,
  10420. const uint16_t out_tensor_dim_x,
  10421. const uint16_t out_tensor_dim_y,
  10422. q15_t *in_tmp_buf)
  10423. {
  10424. #if defined(__zcc__)
  10425. return tpt_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(
  10426. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10427. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10428. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10429. out_tensor_dim_y, in_tmp_buf);
  10430. #else
  10431. return riscv_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(
  10432. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10433. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10434. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10435. out_tensor_dim_y, in_tmp_buf);
  10436. #endif
  10437. }
  10438. /**
  10439. * @brief This function performs fast convolution for signed 8-bit
  10440. * integer inputs/outputs in any x and y dimensions with
  10441. * symmetric quantization on the outputs.
  10442. * @param[in] in_tensor pointer of the input vector
  10443. * @param[in] in_tensor_dim_x x dimension of the input tensor
  10444. * @param[in] in_tensor_dim_y y dimension of the input tensor
  10445. * @param[in] in_tensor_ch number of input tensor channels
  10446. * @param[in] ker_weight pointer of kernel weights
  10447. * @param[in] out_tensor_ch number of output tensor channels
  10448. * @param[in] ker_dim_x x dimension of the filter kernel
  10449. * @param[in] ker_dim_y y dimension of the filter kernel
  10450. * @param[in] pad_x padding size in the x dimension
  10451. * @param[in] pad_y padding size in the y dimension
  10452. * @param[in] stride_x convolution stride in the x dimension
  10453. * @param[in] stride_y convolution stride in the y dimension
  10454. * @param[in] pre_rshift right shift amount for the output
  10455. * @param[in] out_scale value of scaling for the output
  10456. * @param[in] post_rshift right shift amount for the output
  10457. * @param[out] out_tensor pointer of the output tensor
  10458. * @param[in] out_tensor_dim_x x dimension of the output tensor
  10459. * @param[in] out_tensor_dim_y y dimension of the output tensor
  10460. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  10461. * It is required when -mext-dsp or
  10462. * -mext-vector enabled and its size must
  10463. * be equal to "2 * in_tensor_ch * ker_dim_x
  10464. * * ker_dim_y".
  10465. * @return This function returns 0 on success; otherwise, it returns -1
  10466. * if its inputs do not meet the constraints that in_tensor_ch
  10467. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  10468. *
  10469. * @note
  10470. * The outputs will be 2-stage shifted before being stored, i.e.,
  10471. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  10472. */
  10473. static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast_any(const q7_t *in_tensor,
  10474. const uint16_t in_tensor_dim_x,
  10475. const uint16_t in_tensor_dim_y,
  10476. const uint16_t in_tensor_ch,
  10477. const q7_t *ker_weight,
  10478. const uint16_t out_tensor_ch,
  10479. const uint16_t ker_dim_x,
  10480. const uint16_t ker_dim_y,
  10481. const uint16_t pad_x,
  10482. const uint16_t pad_y,
  10483. const uint16_t stride_x,
  10484. const uint16_t stride_y,
  10485. const uint16_t pre_rshift,
  10486. const uint16_t out_scale,
  10487. const uint16_t post_rshift,
  10488. q7_t *out_tensor,
  10489. const uint16_t out_tensor_dim_x,
  10490. const uint16_t out_tensor_dim_y,
  10491. q15_t *in_tmp_buf)
  10492. {
  10493. #if defined(__zcc__)
  10494. return tpt_nn_conv_HWC_s8_s8_s8_sym_fast_any(
  10495. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10496. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10497. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10498. out_tensor_dim_y, in_tmp_buf);
  10499. #else
  10500. return riscv_nn_conv_HWC_s8_s8_s8_sym_fast_any(
  10501. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10502. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10503. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10504. out_tensor_dim_y, in_tmp_buf);
  10505. #endif
  10506. }
  10507. /**
  10508. * @brief This function performs fast convolution for signed 8-bit
  10509. * integer inputs and signed 16-bit integer outputs in any x
  10510. * and y dimensions with symmetric quantization on the outputs.
  10511. * @param[in] in_tensor pointer of the input vector
  10512. * @param[in] in_tensor_dim_x x dimension of the input tensor
  10513. * @param[in] in_tensor_dim_y y dimension of the input tensor
  10514. * @param[in] in_tensor_ch number of input tensor channels
  10515. * @param[in] ker_weight pointer of kernel weights
  10516. * @param[in] out_tensor_ch number of output tensor channels
  10517. * @param[in] ker_dim_x x dimension of the filter kernel
  10518. * @param[in] ker_dim_y y dimension of the filter kernel
  10519. * @param[in] pad_x padding size in the x dimension
  10520. * @param[in] pad_y padding size in the y dimension
  10521. * @param[in] stride_x convolution stride in the x dimension
  10522. * @param[in] stride_y convolution stride in the y dimension
  10523. * @param[in] pre_rshift right shift amount for the output
  10524. * @param[in] out_scale value of scaling for the output
  10525. * @param[in] post_rshift right shift amount for the output
  10526. * @param[out] out_tensor pointer of the output tensor
  10527. * @param[in] out_tensor_dim_x x dimension of the output tensor
  10528. * @param[in] out_tensor_dim_y y dimension of the output tensor
  10529. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  10530. * It is required when -mext-dsp or
  10531. * -mext-vector enabled and its size must
  10532. * be equal to "2 * in_tensor_ch * ker_dim_x
  10533. * * ker_dim_y".
  10534. * @return This function returns 0 on success; otherwise, it returns -1
  10535. * if its inputs do not meet the constraints that in_tensor_ch
  10536. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  10537. *
  10538. * @note
  10539. * The outputs will be 2-stage shifted before being stored, i.e.,
  10540. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  10541. */
  10542. static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast_any(const q7_t *in_tensor,
  10543. const uint16_t in_tensor_dim_x,
  10544. const uint16_t in_tensor_dim_y,
  10545. const uint16_t in_tensor_ch,
  10546. const q7_t *ker_weight,
  10547. const uint16_t out_tensor_ch,
  10548. const uint16_t ker_dim_x,
  10549. const uint16_t ker_dim_y,
  10550. const uint16_t pad_x,
  10551. const uint16_t pad_y,
  10552. const uint16_t stride_x,
  10553. const uint16_t stride_y,
  10554. const uint16_t pre_rshift,
  10555. const uint16_t out_scale,
  10556. const uint16_t post_rshift,
  10557. q15_t *out_tensor,
  10558. const uint16_t out_tensor_dim_x,
  10559. const uint16_t out_tensor_dim_y,
  10560. q15_t *in_tmp_buf)
  10561. {
  10562. #if defined(__zcc__)
  10563. return tpt_nn_conv_HWC_s8_s16_s8_sym_fast_any(
  10564. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10565. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10566. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10567. out_tensor_dim_y, in_tmp_buf);
  10568. #else
  10569. return riscv_nn_conv_HWC_s8_s16_s8_sym_fast_any(
  10570. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10571. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10572. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10573. out_tensor_dim_y, in_tmp_buf);
  10574. #endif
  10575. }
  10576. /**
  10577. * @brief This function performs fast convolution for unsigned 8-bit
  10578. * integer inputs/outputs in any x and y dimensions with
  10579. * symmetric quantization on the outputs.
  10580. * @param[in] in_tensor pointer of the input vector
  10581. * @param[in] in_tensor_dim_x x dimension of the input tensor
  10582. * @param[in] in_tensor_dim_y y dimension of the input tensor
  10583. * @param[in] in_tensor_ch number of input tensor channels
  10584. * @param[in] ker_weight pointer of kernel weights
  10585. * @param[in] out_tensor_ch number of output tensor channels
  10586. * @param[in] ker_dim_x x dimension of the filter kernel
  10587. * @param[in] ker_dim_y y dimension of the filter kernel
  10588. * @param[in] pad_x padding size in the x dimension
  10589. * @param[in] pad_y padding size in the y dimension
  10590. * @param[in] stride_x convolution stride in the x dimension
  10591. * @param[in] stride_y convolution stride in the y dimension
  10592. * @param[in] pre_rshift right shift amount for the output
  10593. * @param[in] out_scale value of scaling for the output
  10594. * @param[in] post_rshift right shift amount for the output
  10595. * @param[out] out_tensor pointer of the output tensor
  10596. * @param[in] out_tensor_dim_x x dimension of the output tensor
  10597. * @param[in] out_tensor_dim_y y dimension of the output tensor
  10598. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  10599. * It is required when -mext-dsp or
  10600. * -mext-vector enabled and its size must
  10601. * be equal to "2 * in_tensor_ch * ker_dim_x
  10602. * * ker_dim_y".
  10603. * @return This function returns 0 on success; otherwise, it returns -1
  10604. * if its inputs do not meet the constraints that in_tensor_ch
  10605. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  10606. *
  10607. * @note
  10608. * The outputs will be 2-stage shifted before being stored, i.e.,
  10609. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  10610. */
  10611. static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast_any(const u8_t *in_tensor,
  10612. const uint16_t in_tensor_dim_x,
  10613. const uint16_t in_tensor_dim_y,
  10614. const uint16_t in_tensor_ch,
  10615. const q7_t *ker_weight,
  10616. const uint16_t out_tensor_ch,
  10617. const uint16_t ker_dim_x,
  10618. const uint16_t ker_dim_y,
  10619. const uint16_t pad_x,
  10620. const uint16_t pad_y,
  10621. const uint16_t stride_x,
  10622. const uint16_t stride_y,
  10623. const uint16_t pre_rshift,
  10624. const uint16_t out_scale,
  10625. const uint16_t post_rshift,
  10626. u8_t *out_tensor,
  10627. const uint16_t out_tensor_dim_x,
  10628. const uint16_t out_tensor_dim_y,
  10629. q15_t *in_tmp_buf)
  10630. {
  10631. #if defined(__zcc__)
  10632. return tpt_nn_conv_HWC_u8_u8_s8_sym_fast_any(
  10633. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10634. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10635. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10636. out_tensor_dim_y, in_tmp_buf);
  10637. #else
  10638. return riscv_nn_conv_HWC_u8_u8_s8_sym_fast_any(
  10639. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10640. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10641. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10642. out_tensor_dim_y, in_tmp_buf);
  10643. #endif
  10644. }
  10645. /**
  10646. * @brief This function performs fast convolution for unsigned 8-bit
  10647. * integer inputs and signed 8-bit integer outputs in any x and
  10648. * y dimensions with symmetric quantization on the outputs.
  10649. * @param[in] in_tensor pointer of the input vector
  10650. * @param[in] in_tensor_dim_x x dimension of the input tensor
  10651. * @param[in] in_tensor_dim_y y dimension of the input tensor
  10652. * @param[in] in_tensor_ch number of input tensor channels
  10653. * @param[in] ker_weight pointer of kernel weights
  10654. * @param[in] out_tensor_ch number of output tensor channels
  10655. * @param[in] ker_dim_x x dimension of the filter kernel
  10656. * @param[in] ker_dim_y y dimension of the filter kernel
  10657. * @param[in] pad_x padding size in the x dimension
  10658. * @param[in] pad_y padding size in the y dimension
  10659. * @param[in] stride_x convolution stride in the x dimension
  10660. * @param[in] stride_y convolution stride in the y dimension
  10661. * @param[in] pre_rshift right shift amount for the output
  10662. * @param[in] out_scale value of scaling for the output
  10663. * @param[in] post_rshift right shift amount for the output
  10664. * @param[out] out_tensor pointer of the output tensor
  10665. * @param[in] out_tensor_dim_x x dimension of the output tensor
  10666. * @param[in] out_tensor_dim_y y dimension of the output tensor
  10667. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  10668. * It is required when -mext-dsp or
  10669. * -mext-vector enabled and its size must
  10670. * be equal to "2 * in_tensor_ch * ker_dim_x
  10671. * * ker_dim_y".
  10672. * @return This function returns 0 on success; otherwise, it returns -1
  10673. * if its inputs do not meet the constraints that in_tensor_ch
  10674. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  10675. *
  10676. * @note
  10677. * The outputs will be 2-stage shifted before being stored, i.e.,
  10678. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  10679. */
  10680. static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast_any(const u8_t *in_tensor,
  10681. const uint16_t in_tensor_dim_x,
  10682. const uint16_t in_tensor_dim_y,
  10683. const uint16_t in_tensor_ch,
  10684. const q7_t *ker_weight,
  10685. const uint16_t out_tensor_ch,
  10686. const uint16_t ker_dim_x,
  10687. const uint16_t ker_dim_y,
  10688. const uint16_t pad_x,
  10689. const uint16_t pad_y,
  10690. const uint16_t stride_x,
  10691. const uint16_t stride_y,
  10692. const uint16_t pre_rshift,
  10693. const uint16_t out_scale,
  10694. const uint16_t post_rshift,
  10695. q7_t *out_tensor,
  10696. const uint16_t out_tensor_dim_x,
  10697. const uint16_t out_tensor_dim_y,
  10698. q15_t *in_tmp_buf)
  10699. {
  10700. #if defined(__zcc__)
  10701. return tpt_nn_conv_HWC_u8_s8_s8_sym_fast_any(
  10702. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10703. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10704. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10705. out_tensor_dim_y, in_tmp_buf);
  10706. #else
  10707. return riscv_nn_conv_HWC_u8_s8_s8_sym_fast_any(
  10708. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10709. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10710. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10711. out_tensor_dim_y, in_tmp_buf);
  10712. #endif
  10713. }
  10714. /**
  10715. * @brief This function performs fast convolution for unsigned 8-bit
  10716. * integer inputs and signed 16-bit integer outputs in any x
  10717. * and y dimensions with symmetric quantization on the outputs.
  10718. * @param[in] in_tensor pointer of the input vector
  10719. * @param[in] in_tensor_dim_x x dimension of the input tensor
  10720. * @param[in] in_tensor_dim_y y dimension of the input tensor
  10721. * @param[in] in_tensor_ch number of input tensor channels
  10722. * @param[in] ker_weight pointer of kernel weights
  10723. * @param[in] out_tensor_ch number of output tensor channels
  10724. * @param[in] ker_dim_x x dimension of the filter kernel
  10725. * @param[in] ker_dim_y y dimension of the filter kernel
  10726. * @param[in] pad_x padding size in the x dimension
  10727. * @param[in] pad_y padding size in the y dimension
  10728. * @param[in] stride_x convolution stride in the x dimension
  10729. * @param[in] stride_y convolution stride in the y dimension
  10730. * @param[in] pre_rshift right shift amount for the output
  10731. * @param[in] out_scale value of scaling for the output
  10732. * @param[in] post_rshift right shift amount for the output
  10733. * @param[out] out_tensor pointer of the output tensor
  10734. * @param[in] out_tensor_dim_x x dimension of the output tensor
  10735. * @param[in] out_tensor_dim_y y dimension of the output tensor
  10736. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  10737. * It is required when -mext-dsp or
  10738. * -mext-vector enabled and its size must
  10739. * be equal to "2 * in_tensor_ch * ker_dim_x
  10740. * * ker_dim_y".
  10741. * @return This function returns 0 on success; otherwise, it returns -1
  10742. * if its inputs do not meet the constraints that in_tensor_ch
  10743. * is a multiple of 4 and out_tensor_ch is a multiple of 2.
  10744. *
  10745. * @note
  10746. * The outputs will be 2-stage shifted before being stored, i.e.,
  10747. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  10748. */
  10749. static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast_any(const u8_t *in_tensor,
  10750. const uint16_t in_tensor_dim_x,
  10751. const uint16_t in_tensor_dim_y,
  10752. const uint16_t in_tensor_ch,
  10753. const q7_t *ker_weight,
  10754. const uint16_t out_tensor_ch,
  10755. const uint16_t ker_dim_x,
  10756. const uint16_t ker_dim_y,
  10757. const uint16_t pad_x,
  10758. const uint16_t pad_y,
  10759. const uint16_t stride_x,
  10760. const uint16_t stride_y,
  10761. const uint16_t pre_rshift,
  10762. const uint16_t out_scale,
  10763. const uint16_t post_rshift,
  10764. q15_t *out_tensor,
  10765. const uint16_t out_tensor_dim_x,
  10766. const uint16_t out_tensor_dim_y,
  10767. q15_t *in_tmp_buf)
  10768. {
  10769. #if defined(__zcc__)
  10770. return tpt_nn_conv_HWC_u8_s16_s8_sym_fast_any(
  10771. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10772. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10773. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10774. out_tensor_dim_y, in_tmp_buf);
  10775. #else
  10776. return riscv_nn_conv_HWC_u8_s16_s8_sym_fast_any(
  10777. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  10778. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  10779. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  10780. out_tensor_dim_y, in_tmp_buf);
  10781. #endif
  10782. }
  10783. /**
  10784. * @brief This function performs depthwise convolution for signed
  10785. * 8-bit integer inputs/outputs with bias inputs and symmetric
  10786. * quantization on the outputs.
  10787. * @param[in] in_tensor pointer of the input tensor
  10788. * @param[in] in_tensor_dim dimension of the input tensor
  10789. * @param[in] in_tensor_ch number of input tensor channels
  10790. * @param[in] ker_weight pointer of kernel weights
  10791. * @param[in] out_tensor_ch number of output tensor channels
  10792. * @param[in] ker_dim dimension of the filter kernel
  10793. * @param[in] pad padding size
  10794. * @param[in] stride convolution stride
  10795. * @param[in] bias pointer of the bias vector
  10796. * @param[in] pre_rshift right shift amount for the output
  10797. * @param[in] out_scale value of scaling for the output
  10798. * @param[in] post_rshift right shift amount for the output
  10799. * @param[out] out_tensor pointer of the output tensor
  10800. * @param[in] out_tensor_dim dimension of the output tensor
  10801. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  10802. * required when -mext-dsp or -mext-vector is
  10803. * enabled and its size must be equal to
  10804. * "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
  10805. * @return This function returns 0 on success; otherwise, it returns -1
  10806. * if its inputs do not meet the constraints that in_tensor_ch
  10807. * has to be equal to out_tensor_ch.
  10808. *
  10809. * @note
  10810. * The outputs will be 2-stage shifted before being stored, i.e.,
  10811. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  10812. */
  10813. static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias(const q7_t *in_tensor,
  10814. const uint16_t in_tensor_dim,
  10815. const uint16_t in_tensor_ch,
  10816. const q7_t *ker_weight,
  10817. const uint16_t out_tensor_ch,
  10818. const uint16_t ker_dim,
  10819. const uint16_t pad,
  10820. const uint16_t stride,
  10821. const q31_t *bias,
  10822. const uint16_t pre_rshift,
  10823. const uint16_t out_scale,
  10824. const uint16_t post_rshift,
  10825. q7_t *out_tensor,
  10826. const uint16_t out_tensor_dim,
  10827. q15_t *in_tmp_buf)
  10828. {
  10829. #if defined(__zcc__)
  10830. return tpt_nn_conv_dw_HWC_s8_s8_s8_sym_bias(
  10831. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  10832. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  10833. out_tensor, out_tensor_dim, in_tmp_buf);
  10834. #else
  10835. return riscv_nn_conv_dw_HWC_s8_s8_s8_sym_bias(
  10836. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  10837. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  10838. out_tensor, out_tensor_dim, in_tmp_buf);
  10839. #endif
  10840. }
  10841. /**
  10842. * @brief This function performs depthwise convolution for signed
  10843. * 8-bit integer inputs and signed 16-bit integer outputs with
  10844. * bias inputs and symmetric quantization on the outputs.
  10845. * @param[in] in_tensor pointer of the input tensor
  10846. * @param[in] in_tensor_dim dimension of the input tensor
  10847. * @param[in] in_tensor_ch number of input tensor channels
  10848. * @param[in] ker_weight pointer of kernel weights
  10849. * @param[in] out_tensor_ch number of output tensor channels
  10850. * @param[in] ker_dim dimension of the filter kernel
  10851. * @param[in] pad padding size
  10852. * @param[in] stride convolution stride
  10853. * @param[in] bias pointer of the bias vector
  10854. * @param[in] pre_rshift right shift amount for the output
  10855. * @param[in] out_scale value of scaling for the output
  10856. * @param[in] post_rshift right shift amount for the output
  10857. * @param[out] out_tensor pointer of the output tensor
  10858. * @param[in] out_tensor_dim dimension of the output tensor
  10859. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  10860. * required when -mext-dsp or -mext-vector is
  10861. * enabled and its size must be equal to
  10862. * "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
  10863. * @return This function returns 0 on success; otherwise, it returns -1
  10864. * if its inputs do not meet the constraints that in_tensor_ch
  10865. * has to be equal to out_tensor_ch.
  10866. *
  10867. * @note
  10868. * The outputs will be 2-stage shifted before being stored, i.e.,
  10869. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  10870. */
  10871. static inline int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias(const q7_t *in_tensor,
  10872. const uint16_t in_tensor_dim,
  10873. const uint16_t in_tensor_ch,
  10874. const q7_t *ker_weight,
  10875. const uint16_t out_tensor_ch,
  10876. const uint16_t ker_dim,
  10877. const uint16_t pad,
  10878. const uint16_t stride,
  10879. const q31_t *bias,
  10880. const uint16_t pre_rshift,
  10881. const uint16_t out_scale,
  10882. const uint16_t post_rshift,
  10883. q15_t *out_tensor,
  10884. const uint16_t out_tensor_dim,
  10885. q15_t *in_tmp_buf)
  10886. {
  10887. #if defined(__zcc__)
  10888. return tpt_nn_conv_dw_HWC_s8_s16_s8_sym_bias(
  10889. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  10890. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  10891. out_tensor, out_tensor_dim, in_tmp_buf);
  10892. #else
  10893. return riscv_nn_conv_dw_HWC_s8_s16_s8_sym_bias(
  10894. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  10895. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  10896. out_tensor, out_tensor_dim, in_tmp_buf);
  10897. #endif
  10898. }
  10899. /**
  10900. * @brief This function performs depthwise convolution for unsigned
  10901. * 8-bit integer inputs/outputs with bias inputs and symmetric
  10902. * quantization on the outputs.
  10903. * @param[in] in_tensor pointer of the input tensor
  10904. * @param[in] in_tensor_dim dimension of the input tensor
  10905. * @param[in] in_tensor_ch number of input tensor channels
  10906. * @param[in] ker_weight pointer of kernel weights
  10907. * @param[in] out_tensor_ch number of output tensor channels
  10908. * @param[in] ker_dim dimension of the filter kernel
  10909. * @param[in] pad padding size
  10910. * @param[in] stride convolution stride
  10911. * @param[in] bias pointer of the bias vector
  10912. * @param[in] pre_rshift right shift amount for the output
  10913. * @param[in] out_scale value of scaling for the output
  10914. * @param[in] post_rshift right shift amount for the output
  10915. * @param[out] out_tensor pointer of the output tensor
  10916. * @param[in] out_tensor_dim dimension of the output tensor
  10917. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  10918. * required when -mext-dsp or -mext-vector is
  10919. * enabled and its size must be equal to
  10920. * "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
  10921. * @return This function returns 0 on success; otherwise, it returns -1
  10922. * if its inputs do not meet the constraints that in_tensor_ch
  10923. * has to be equal to out_tensor_ch.
  10924. *
  10925. * @note
  10926. * The outputs will be 2-stage shifted before being stored, i.e.,
  10927. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  10928. */
  10929. static inline int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias(const u8_t *in_tensor,
  10930. const uint16_t in_tensor_dim,
  10931. const uint16_t in_tensor_ch,
  10932. const q7_t *ker_weight,
  10933. const uint16_t out_tensor_ch,
  10934. const uint16_t ker_dim,
  10935. const uint16_t pad,
  10936. const uint16_t stride,
  10937. const q31_t *bias,
  10938. const uint16_t pre_rshift,
  10939. const uint16_t out_scale,
  10940. const uint16_t post_rshift,
  10941. u8_t *out_tensor,
  10942. const uint16_t out_tensor_dim,
  10943. q15_t *in_tmp_buf)
  10944. {
  10945. #if defined(__zcc__)
  10946. return tpt_nn_conv_dw_HWC_u8_u8_s8_sym_bias(
  10947. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  10948. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  10949. out_tensor, out_tensor_dim, in_tmp_buf);
  10950. #else
  10951. return riscv_nn_conv_dw_HWC_u8_u8_s8_sym_bias(
  10952. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  10953. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  10954. out_tensor, out_tensor_dim, in_tmp_buf);
  10955. #endif
  10956. }
  10957. /**
  10958. * @brief This function performs depthwise convolution for unsigned
  10959. * 8-bit integer inputs and signed 8-bit integer outputs with
  10960. * bias inputs and symmetric quantization on the outputs.
  10961. * @param[in] in_tensor pointer of the input tensor
  10962. * @param[in] in_tensor_dim dimension of the input tensor
  10963. * @param[in] in_tensor_ch number of input tensor channels
  10964. * @param[in] ker_weight pointer of kernel weights
  10965. * @param[in] out_tensor_ch number of output tensor channels
  10966. * @param[in] ker_dim dimension of the filter kernel
  10967. * @param[in] pad padding size
  10968. * @param[in] stride convolution stride
  10969. * @param[in] bias pointer of the bias vector
  10970. * @param[in] pre_rshift right shift amount for the output
  10971. * @param[in] out_scale value of scaling for the output
  10972. * @param[in] post_rshift right shift amount for the output
  10973. * @param[out] out_tensor pointer of the output tensor
  10974. * @param[in] out_tensor_dim dimension of the output tensor
  10975. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  10976. * required when -mext-dsp or -mext-vector is
  10977. * enabled and its size must be equal to
  10978. * "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
  10979. * @return This function returns 0 on success; otherwise, it returns -1
  10980. * if its inputs do not meet the constraints that in_tensor_ch
  10981. * has to be equal to out_tensor_ch.
  10982. *
  10983. * @note
  10984. * The outputs will be 2-stage shifted before being stored, i.e.,
  10985. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  10986. */
  10987. static inline int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias(const u8_t *in_tensor,
  10988. const uint16_t in_tensor_dim,
  10989. const uint16_t in_tensor_ch,
  10990. const q7_t *ker_weight,
  10991. const uint16_t out_tensor_ch,
  10992. const uint16_t ker_dim,
  10993. const uint16_t pad,
  10994. const uint16_t stride,
  10995. const q31_t *bias,
  10996. const uint16_t pre_rshift,
  10997. const uint16_t out_scale,
  10998. const uint16_t post_rshift,
  10999. q7_t *out_tensor,
  11000. const uint16_t out_tensor_dim,
  11001. q15_t *in_tmp_buf)
  11002. {
  11003. #if defined(__zcc__)
  11004. return tpt_nn_conv_dw_HWC_u8_s8_s8_sym_bias(
  11005. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  11006. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  11007. out_tensor, out_tensor_dim, in_tmp_buf);
  11008. #else
  11009. return riscv_nn_conv_dw_HWC_u8_s8_s8_sym_bias(
  11010. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  11011. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  11012. out_tensor, out_tensor_dim, in_tmp_buf);
  11013. #endif
  11014. }
  11015. /**
  11016. * @brief This function performs depthwise convolution for unsigned
  11017. * 8-bit integer inputs and signed 16-bit integer outputs with
  11018. * bias inputs and symmetric quantization on the outputs.
  11019. * @param[in] in_tensor pointer of the input tensor
  11020. * @param[in] in_tensor_dim dimension of the input tensor
  11021. * @param[in] in_tensor_ch number of input tensor channels
  11022. * @param[in] ker_weight pointer of kernel weights
  11023. * @param[in] out_tensor_ch number of output tensor channels
  11024. * @param[in] ker_dim dimension of the filter kernel
  11025. * @param[in] pad padding size
  11026. * @param[in] stride convolution stride
  11027. * @param[in] bias pointer of the bias vector
  11028. * @param[in] pre_rshift right shift amount for the output
  11029. * @param[in] out_scale value of scaling for the output
  11030. * @param[in] post_rshift right shift amount for the output
  11031. * @param[out] out_tensor pointer of the output tensor
  11032. * @param[in] out_tensor_dim dimension of the output tensor
  11033. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  11034. * required when -mext-dsp or -mext-vector is
  11035. * enabled and its size must be equal to
  11036. * "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
  11037. * @return This function returns 0 on success; otherwise, it returns -1
  11038. * if its inputs do not meet the constraints that in_tensor_ch
  11039. * has to be equal to out_tensor_ch.
  11040. *
  11041. * @note
  11042. * The outputs will be 2-stage shifted before being stored, i.e.,
  11043. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  11044. */
  11045. static inline int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias(const u8_t *in_tensor,
  11046. const uint16_t in_tensor_dim,
  11047. const uint16_t in_tensor_ch,
  11048. const q7_t *ker_weight,
  11049. const uint16_t out_tensor_ch,
  11050. const uint16_t ker_dim,
  11051. const uint16_t pad,
  11052. const uint16_t stride,
  11053. const q31_t *bias,
  11054. const uint16_t pre_rshift,
  11055. const uint16_t out_scale,
  11056. const uint16_t post_rshift,
  11057. q15_t *out_tensor,
  11058. const uint16_t out_tensor_dim,
  11059. q15_t *in_tmp_buf)
  11060. {
  11061. #if defined(__zcc__)
  11062. return tpt_nn_conv_dw_HWC_u8_s16_s8_sym_bias(
  11063. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  11064. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  11065. out_tensor, out_tensor_dim, in_tmp_buf);
  11066. #else
  11067. return riscv_nn_conv_dw_HWC_u8_s16_s8_sym_bias(
  11068. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  11069. ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
  11070. out_tensor, out_tensor_dim, in_tmp_buf);
  11071. #endif
  11072. }
  11073. /**
  11074. * @brief This function performs depthwise convolution for signed
  11075. * 8-bit integer inputs/outputs with symmetric quantization on
  11076. * the outputs.
  11077. * @param[in] in_tensor pointer of the input tensor
  11078. * @param[in] in_tensor_dim dimension of the input tensor
  11079. * @param[in] in_tensor_ch number of input tensor channels
  11080. * @param[in] ker_weight pointer of kernel weights
  11081. * @param[in] out_tensor_ch number of output tensor channels
  11082. * @param[in] ker_dim dimension of the filter kernel
  11083. * @param[in] pad padding size
  11084. * @param[in] stride convolution stride
  11085. * @param[in] pre_rshift right shift amount for the output
  11086. * @param[in] out_scale value of scaling for the output
  11087. * @param[in] post_rshift right shift amount for the output
  11088. * @param[out] out_tensor pointer of the output tensor
  11089. * @param[in] out_tensor_dim dimension of the output tensor
  11090. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  11091. * required when -mext-dsp or -mext-vector is
  11092. * enabled and its size must be equal to
  11093. * "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
  11094. * @return This function returns 0 on success; otherwise, it returns -1
  11095. * if its inputs do not meet the constraints that in_tensor_ch
  11096. * has to be equal to out_tensor_ch.
  11097. *
  11098. * @note
  11099. * The outputs will be 2-stage shifted before being stored, i.e.,
  11100. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  11101. */
  11102. static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym(const q7_t *in_tensor,
  11103. const uint16_t in_tensor_dim,
  11104. const uint16_t in_tensor_ch,
  11105. const q7_t *ker_weight,
  11106. const uint16_t out_tensor_ch,
  11107. const uint16_t ker_dim,
  11108. const uint16_t pad,
  11109. const uint16_t stride,
  11110. const uint16_t pre_rshift,
  11111. const uint16_t out_scale,
  11112. const uint16_t post_rshift,
  11113. q7_t *out_tensor,
  11114. const uint16_t out_tensor_dim,
  11115. q15_t *in_tmp_buf)
  11116. {
  11117. #if defined(__zcc__)
  11118. return tpt_nn_conv_dw_HWC_s8_s8_s8_sym(
  11119. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  11120. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  11121. out_tensor_dim, in_tmp_buf);
  11122. #else
  11123. return riscv_nn_conv_dw_HWC_s8_s8_s8_sym(
  11124. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  11125. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  11126. out_tensor_dim, in_tmp_buf);
  11127. #endif
  11128. }
  11129. /**
  11130. * @brief This function performs depthwise convolution for signed
  11131. * 8-bit integer inputs and signed 16-bit integer outputs with
  11132. * symmetric quantization on the outputs.
  11133. * @param[in] in_tensor pointer of the input tensor
  11134. * @param[in] in_tensor_dim dimension of the input tensor
  11135. * @param[in] in_tensor_ch number of input tensor channels
  11136. * @param[in] ker_weight pointer of kernel weights
  11137. * @param[in] out_tensor_ch number of output tensor channels
  11138. * @param[in] ker_dim dimension of the filter kernel
  11139. * @param[in] pad padding size
  11140. * @param[in] stride convolution stride
  11141. * @param[in] pre_rshift right shift amount for the output
  11142. * @param[in] out_scale value of scaling for the output
  11143. * @param[in] post_rshift right shift amount for the output
  11144. * @param[out] out_tensor pointer of the output tensor
  11145. * @param[in] out_tensor_dim dimension of the output tensor
  11146. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  11147. * required when -mext-dsp or -mext-vector is
  11148. * enabled and its size must be equal to
  11149. * "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
  11150. * @return This function returns 0 on success; otherwise, it returns -1
  11151. * if its inputs do not meet the constraints that in_tensor_ch
  11152. * has to be equal to out_tensor_ch.
  11153. *
  11154. * @note
  11155. * The outputs will be 2-stage shifted before being stored, i.e.,
  11156. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  11157. */
  11158. static inline int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym(const q7_t *in_tensor,
  11159. const uint16_t in_tensor_dim,
  11160. const uint16_t in_tensor_ch,
  11161. const q7_t *ker_weight,
  11162. const uint16_t out_tensor_ch,
  11163. const uint16_t ker_dim,
  11164. const uint16_t pad,
  11165. const uint16_t stride,
  11166. const uint16_t pre_rshift,
  11167. const uint16_t out_scale,
  11168. const uint16_t post_rshift,
  11169. q15_t *out_tensor,
  11170. const uint16_t out_tensor_dim,
  11171. q15_t *in_tmp_buf)
  11172. {
  11173. #if defined(__zcc__)
  11174. return tpt_nn_conv_dw_HWC_s8_s16_s8_sym(
  11175. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  11176. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  11177. out_tensor_dim, in_tmp_buf);
  11178. #else
  11179. return riscv_nn_conv_dw_HWC_s8_s16_s8_sym(
  11180. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  11181. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  11182. out_tensor_dim, in_tmp_buf);
  11183. #endif
  11184. }
  11185. /**
  11186. * @brief This function performs depthwise convolution for unsigned
  11187. * 8-bit integer inputs/outputs with symmetric quantization on
  11188. * the outputs.
  11189. * @param[in] in_tensor pointer of the input tensor
  11190. * @param[in] in_tensor_dim dimension of the input tensor
  11191. * @param[in] in_tensor_ch number of input tensor channels
  11192. * @param[in] ker_weight pointer of kernel weights
  11193. * @param[in] out_tensor_ch number of output tensor channels
  11194. * @param[in] ker_dim dimension of the filter kernel
  11195. * @param[in] pad padding size
  11196. * @param[in] stride convolution stride
  11197. * @param[in] pre_rshift right shift amount for the output
  11198. * @param[in] out_scale value of scaling for the output
  11199. * @param[in] post_rshift right shift amount for the output
  11200. * @param[out] out_tensor pointer of the output tensor
  11201. * @param[in] out_tensor_dim dimension of the output tensor
  11202. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  11203. * required when -mext-dsp or -mext-vector is
  11204. * enabled and its size must be equal to
  11205. * (in_tensor_ch * ker_dim * ker_dim + 1) / 2.
  11206. * @return This function returns 0 on success; otherwise, it returns -1
  11207. * if its inputs do not meet the constraints that in_tensor_ch
  11208. * has to be equal to out_tensor_ch.
  11209. *
  11210. * @note
  11211. * The outputs will be 2-stage shifted before being stored, i.e.,
  11212. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  11213. */
  11214. static inline int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym(const u8_t *in_tensor,
  11215. const uint16_t in_tensor_dim,
  11216. const uint16_t in_tensor_ch,
  11217. const q7_t *ker_weight,
  11218. const uint16_t out_tensor_ch,
  11219. const uint16_t ker_dim,
  11220. const uint16_t pad,
  11221. const uint16_t stride,
  11222. const uint16_t pre_rshift,
  11223. const uint16_t out_scale,
  11224. const uint16_t post_rshift,
  11225. u8_t *out_tensor,
  11226. const uint16_t out_tensor_dim,
  11227. q15_t *in_tmp_buf)
  11228. {
  11229. #if defined(__zcc__)
  11230. return tpt_nn_conv_dw_HWC_u8_u8_s8_sym(
  11231. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  11232. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  11233. out_tensor_dim, in_tmp_buf);
  11234. #else
  11235. return riscv_nn_conv_dw_HWC_u8_u8_s8_sym(
  11236. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  11237. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  11238. out_tensor_dim, in_tmp_buf);
  11239. #endif
  11240. }
  11241. /**
  11242. * @brief This function performs depthwise convolution for unsigned
  11243. * 8-bit integer inputs and signed 8-bit integer outputs, and
  11244. * with symmetric quantization on the outputs.
  11245. * @param[in] in_tensor pointer of the input tensor
  11246. * @param[in] in_tensor_dim dimension of the input tensor
  11247. * @param[in] in_tensor_ch number of input tensor channels
  11248. * @param[in] ker_weight pointer of kernel weights
  11249. * @param[in] out_tensor_ch number of output tensor channels
  11250. * @param[in] ker_dim dimension of the filter kernel
  11251. * @param[in] pad padding size
  11252. * @param[in] stride convolution stride
  11253. * @param[in] pre_rshift right shift amount for the output
  11254. * @param[in] out_scale value of scaling for the output
  11255. * @param[in] post_rshift right shift amount for the output
  11256. * @param[out] out_tensor pointer of the output tensor
  11257. * @param[in] out_tensor_dim dimension of the output tensor
  11258. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  11259. * required when -mext-dsp or -mext-vector is
  11260. * enabled and its size must be equal to
  11261. * "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
  11262. * @return This function returns 0 on success; otherwise, it returns -1
  11263. * if its inputs do not meet the constraints that in_tensor_ch
  11264. * has to be equal to out_tensor_ch.
  11265. *
  11266. * @note
  11267. * The outputs will be 2-stage shifted before being stored, i.e.,
  11268. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  11269. */
  11270. static inline int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym(const u8_t *in_tensor,
  11271. const uint16_t in_tensor_dim,
  11272. const uint16_t in_tensor_ch,
  11273. const q7_t *ker_weight,
  11274. const uint16_t out_tensor_ch,
  11275. const uint16_t ker_dim,
  11276. const uint16_t pad,
  11277. const uint16_t stride,
  11278. const uint16_t pre_rshift,
  11279. const uint16_t out_scale,
  11280. const uint16_t post_rshift,
  11281. q7_t *out_tensor,
  11282. const uint16_t out_tensor_dim,
  11283. q15_t *in_tmp_buf)
  11284. {
  11285. #if defined(__zcc__)
  11286. return tpt_nn_conv_dw_HWC_u8_s8_s8_sym(
  11287. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  11288. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  11289. out_tensor_dim, in_tmp_buf);
  11290. #else
  11291. return riscv_nn_conv_dw_HWC_u8_s8_s8_sym(
  11292. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  11293. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  11294. out_tensor_dim, in_tmp_buf);
  11295. #endif
  11296. }
  11297. /**
  11298. * @brief This function performs depthwise convolution for unsigned
  11299. * 8-bit integer inputs and signed 16-bit integer outputs with
  11300. * symmetric quantization on the outputs.
  11301. * @param[in] in_tensor pointer of the input tensor
  11302. * @param[in] in_tensor_dim dimension of the input tensor
  11303. * @param[in] in_tensor_ch number of input tensor channels
  11304. * @param[in] ker_weight pointer of kernel weights
  11305. * @param[in] out_tensor_ch number of output tensor channels
  11306. * @param[in] ker_dim dimension of the filter kernel
  11307. * @param[in] pad padding size
  11308. * @param[in] stride convolution stride
  11309. * @param[in] pre_rshift right shift amount for the output
  11310. * @param[in] out_scale value of scaling for the output
  11311. * @param[in] post_rshift right shift amount for the output
  11312. * @param[out] out_tensor pointer of the output tensor
  11313. * @param[in] out_tensor_dim dimension of the output tensor
  11314. * @param[in] in_tmp_buf temporary buffer for the input tensor. It is
  11315. * required when -mext-dsp or -mext-vector is
  11316. * enabled and its size must be equal to
  11317. * "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
  11318. * @return This function returns 0 on success; otherwise, it returns -1
  11319. * if its inputs do not meet the constraints that in_tensor_ch
  11320. * has to be equal to out_tensor_ch.
  11321. *
  11322. * @note
  11323. * The outputs will be 2-stage shifted before being stored, i.e.,
  11324. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  11325. */
  11326. static inline int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym(const u8_t *in_tensor,
  11327. const uint16_t in_tensor_dim,
  11328. const uint16_t in_tensor_ch,
  11329. const q7_t *ker_weight,
  11330. const uint16_t out_tensor_ch,
  11331. const uint16_t ker_dim,
  11332. const uint16_t pad,
  11333. const uint16_t stride,
  11334. const uint16_t pre_rshift,
  11335. const uint16_t out_scale,
  11336. const uint16_t post_rshift,
  11337. q15_t *out_tensor,
  11338. const uint16_t out_tensor_dim,
  11339. q15_t *in_tmp_buf)
  11340. {
  11341. #if defined(__zcc__)
  11342. return tpt_nn_conv_dw_HWC_u8_s16_s8_sym(
  11343. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  11344. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  11345. out_tensor_dim, in_tmp_buf);
  11346. #else
  11347. return riscv_nn_conv_dw_HWC_u8_s16_s8_sym(
  11348. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  11349. ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
  11350. out_tensor_dim, in_tmp_buf);
  11351. #endif
  11352. }
  11353. /**
  11354. * @brief This function performs depthwise convolution for signed
  11355. * 8-bit integer inputs/outputs in any x and y dimensions with
  11356. * bias inputs and symmetric quantization on the outputs.
  11357. * @param[in] in_tensor pointer of the input tensor
  11358. * @param[in] in_tensor_dim_x x dimension of the input tensor
  11359. * @param[in] in_tensor_dim_y y dimension of the input tensor
  11360. * @param[in] in_tensor_ch number of input tensor channels
  11361. * @param[in] ker_weight pointer of kernel weights
  11362. * @param[in] out_tensor_ch number of output tensor channels
  11363. * @param[in] ker_dim_x x dimension of the filter kernel
  11364. * @param[in] ker_dim_y y dimension of the filter kernel
  11365. * @param[in] pad_x padding size in the x dimension
  11366. * @param[in] pad_y padding size in the y dimension
  11367. * @param[in] stride_x convolution stride in the x dimension
  11368. * @param[in] stride_y convolution stride in the y dimension
  11369. * @param[in] bias pointer of the bias vector
  11370. * @param[in] pre_rshift right shift amount for the output
  11371. * @param[in] out_scale value of scaling for the output
  11372. * @param[in] post_rshift right shift amount for the output
  11373. * @param[out] out_tensor pointer of the output tensor
  11374. * @param[in] out_tensor_dim_x x dimension of the output tensor
  11375. * @param[in] out_tensor_dim_y y dimension of the output tensor
  11376. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  11377. * It is required when -mext-dsp or
  11378. * -mext-vector is enabled and its size
  11379. * must be equal to "(in_tensor_ch *
  11380. * ker_dim_x * ker_dim_y + 1) / 2".
  11381. * @return This function returns 0 on success; otherwise, it returns -1
  11382. * if its inputs do not meet the constraints that in_tensor_ch
  11383. * must be equal to out_tensor_ch.
  11384. *
  11385. * @note
  11386. * The outputs will be 2-stage shifted before being stored, i.e.,
  11387. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  11388. */
  11389. static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(const q7_t *in_tensor,
  11390. const uint16_t in_tensor_dim_x,
  11391. const uint16_t in_tensor_dim_y,
  11392. const uint16_t in_tensor_ch,
  11393. const q7_t *ker_weight,
  11394. const uint16_t out_tensor_ch,
  11395. const uint16_t ker_dim_x,
  11396. const uint16_t ker_dim_y,
  11397. const uint16_t pad_x,
  11398. const uint16_t pad_y,
  11399. const uint16_t stride_x,
  11400. const uint16_t stride_y,
  11401. const q31_t *bias,
  11402. const uint16_t pre_rshift,
  11403. const uint16_t out_scale,
  11404. const uint16_t post_rshift,
  11405. q7_t *out_tensor,
  11406. const uint16_t out_tensor_dim_x,
  11407. const uint16_t out_tensor_dim_y,
  11408. q15_t *in_tmp_buf)
  11409. {
  11410. #if defined(__zcc__)
  11411. return tpt_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(
  11412. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11413. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11414. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11415. out_tensor_dim_y, in_tmp_buf);
  11416. #else
  11417. return riscv_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(
  11418. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11419. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11420. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11421. out_tensor_dim_y, in_tmp_buf);
  11422. #endif
  11423. }
  11424. /**
  11425. * @brief This function performs depthwise convolution for signed
  11426. * 8-bit integer inputs and signed 16-bit integer outputs in
  11427. * any x and y dimensions with bias inputs and symmetric
  11428. * quantization on the outputs.
  11429. * @param[in] in_tensor pointer of the input tensor
  11430. * @param[in] in_tensor_dim_x x dimension of the input tensor
  11431. * @param[in] in_tensor_dim_y y dimension of the input tensor
  11432. * @param[in] in_tensor_ch number of input tensor channels
  11433. * @param[in] ker_weight pointer of kernel weights
  11434. * @param[in] out_tensor_ch number of output tensor channels
  11435. * @param[in] ker_dim_x x dimension of the filter kernel
  11436. * @param[in] ker_dim_y y dimension of the filter kernel
  11437. * @param[in] pad_x padding size in the x dimension
  11438. * @param[in] pad_y padding size in the y dimension
  11439. * @param[in] stride_x convolution stride in the x dimension
  11440. * @param[in] stride_y convolution stride in the y dimension
  11441. * @param[in] bias pointer of the bias vector
  11442. * @param[in] pre_rshift right shift amount for the output
  11443. * @param[in] out_scale value of scaling for the output
  11444. * @param[in] post_rshift right shift amount for the output
  11445. * @param[out] out_tensor pointer of the output tensor
  11446. * @param[in] out_tensor_dim_x x dimension of the output tensor
  11447. * @param[in] out_tensor_dim_y y dimension of the output tensor
  11448. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  11449. * It is required when -mext-dsp or
  11450. * -mext-vector is enabled and its size
  11451. * must be equal to "(in_tensor_ch *
  11452. * ker_dim_x * ker_dim_y + 1) / 2".
  11453. * @return This function returns 0 on success; otherwise, it returns -1
  11454. * if its inputs do not meet the constraints that in_tensor_ch
  11455. * must be equal to out_tensor_ch.
  11456. *
  11457. * @note
  11458. * The outputs will be 2-stage shifted before being stored, i.e.,
  11459. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  11460. */
  11461. static inline int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(const q7_t *in_tensor,
  11462. const uint16_t in_tensor_dim_x,
  11463. const uint16_t in_tensor_dim_y,
  11464. const uint16_t in_tensor_ch,
  11465. const q7_t *ker_weight,
  11466. const uint16_t out_tensor_ch,
  11467. const uint16_t ker_dim_x,
  11468. const uint16_t ker_dim_y,
  11469. const uint16_t pad_x,
  11470. const uint16_t pad_y,
  11471. const uint16_t stride_x,
  11472. const uint16_t stride_y,
  11473. const q31_t *bias,
  11474. const uint16_t pre_rshift,
  11475. const uint16_t out_scale,
  11476. const uint16_t post_rshift,
  11477. q15_t *out_tensor,
  11478. const uint16_t out_tensor_dim_x,
  11479. const uint16_t out_tensor_dim_y,
  11480. q15_t *in_tmp_buf)
  11481. {
  11482. #if defined(__zcc__)
  11483. return tpt_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(
  11484. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11485. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11486. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11487. out_tensor_dim_y, in_tmp_buf);
  11488. #else
  11489. return riscv_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(
  11490. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11491. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11492. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11493. out_tensor_dim_y, in_tmp_buf);
  11494. #endif
  11495. }
  11496. /**
  11497. * @brief This function performs depthwise convolution for unsigned
  11498. * 8-bit integer inputs/outputs in any x and y dimensions with
  11499. * bias inputs and symmetric quantization on the outputs.
  11500. * @param[in] in_tensor pointer of the input tensor
  11501. * @param[in] in_tensor_dim_x x dimension of the input tensor
  11502. * @param[in] in_tensor_dim_y y dimension of the input tensor
  11503. * @param[in] in_tensor_ch number of input tensor channels
  11504. * @param[in] ker_weight pointer of kernel weights
  11505. * @param[in] out_tensor_ch number of output tensor channels
  11506. * @param[in] ker_dim_x x dimension of the filter kernel
  11507. * @param[in] ker_dim_y y dimension of the filter kernel
  11508. * @param[in] pad_x padding size in the x dimension
  11509. * @param[in] pad_y padding size in the y dimension
  11510. * @param[in] stride_x convolution stride in the x dimension
  11511. * @param[in] stride_y convolution stride in the y dimension
  11512. * @param[in] bias pointer of the bias vector
  11513. * @param[in] pre_rshift right shift amount for the output
  11514. * @param[in] out_scale value of scaling for the output
  11515. * @param[in] post_rshift right shift amount for the output
  11516. * @param[out] out_tensor pointer of the output tensor
  11517. * @param[in] out_tensor_dim_x x dimension of the output tensor
  11518. * @param[in] out_tensor_dim_y y dimension of the output tensor
  11519. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  11520. * It is required when -mext-dsp or
  11521. * -mext-vector is enabled and its size
  11522. * must be equal to "(in_tensor_ch *
  11523. * ker_dim_x * ker_dim_y + 1) / 2".
  11524. * @return This function returns 0 on success; otherwise, it returns -1
  11525. * if its inputs do not meet the constraints that in_tensor_ch
  11526. * must be equal to out_tensor_ch.
  11527. *
  11528. * @note
  11529. * The outputs will be 2-stage shifted before being stored, i.e.,
  11530. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  11531. */
  11532. static inline int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(const u8_t *in_tensor,
  11533. const uint16_t in_tensor_dim_x,
  11534. const uint16_t in_tensor_dim_y,
  11535. const uint16_t in_tensor_ch,
  11536. const q7_t *ker_weight,
  11537. const uint16_t out_tensor_ch,
  11538. const uint16_t ker_dim_x,
  11539. const uint16_t ker_dim_y,
  11540. const uint16_t pad_x,
  11541. const uint16_t pad_y,
  11542. const uint16_t stride_x,
  11543. const uint16_t stride_y,
  11544. const q31_t *bias,
  11545. const uint16_t pre_rshift,
  11546. const uint16_t out_scale,
  11547. const uint16_t post_rshift,
  11548. u8_t *out_tensor,
  11549. const uint16_t out_tensor_dim_x,
  11550. const uint16_t out_tensor_dim_y,
  11551. q15_t *in_tmp_buf)
  11552. {
  11553. #if defined(__zcc__)
  11554. return tpt_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(
  11555. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11556. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11557. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11558. out_tensor_dim_y, in_tmp_buf);
  11559. #else
  11560. return riscv_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(
  11561. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11562. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11563. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11564. out_tensor_dim_y, in_tmp_buf);
  11565. #endif
  11566. }
  11567. /**
  11568. * @brief This function performs depthwise convolution for unsigned
  11569. * 8-bit integer inputs and signed 8-bit integer outputs in any
  11570. * x and y dimensions with bias inputs and symmetric
  11571. * quantization on the outputs.
  11572. * @param[in] in_tensor pointer of the input tensor
  11573. * @param[in] in_tensor_dim_x x dimension of the input tensor
  11574. * @param[in] in_tensor_dim_y y dimension of the input tensor
  11575. * @param[in] in_tensor_ch number of input tensor channels
  11576. * @param[in] ker_weight pointer of kernel weights
  11577. * @param[in] out_tensor_ch number of output tensor channels
  11578. * @param[in] ker_dim_x x dimension of the filter kernel
  11579. * @param[in] ker_dim_y y dimension of the filter kernel
  11580. * @param[in] pad_x padding size in the x dimension
  11581. * @param[in] pad_y padding size in the y dimension
  11582. * @param[in] stride_x convolution stride in the x dimension
  11583. * @param[in] stride_y convolution stride in the y dimension
  11584. * @param[in] bias pointer of the bias vector
  11585. * @param[in] pre_rshift right shift amount for the output
  11586. * @param[in] out_scale value of scaling for the output
  11587. * @param[in] post_rshift right shift amount for the output
  11588. * @param[out] out_tensor pointer of the output tensor
  11589. * @param[in] out_tensor_dim_x x dimension of the output tensor
  11590. * @param[in] out_tensor_dim_y y dimension of the output tensor
  11591. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  11592. * It is required when -mext-dsp or
  11593. * -mext-vector is enabled and its size
  11594. * must be equal to "(in_tensor_ch *
  11595. * ker_dim_x * ker_dim_y + 1) / 2".
  11596. * @return This function returns 0 on success; otherwise, it returns -1
  11597. * if its inputs do not meet the constraints that in_tensor_ch
  11598. * must be equal to out_tensor_ch.
  11599. *
  11600. * @note
  11601. * The outputs will be 2-stage shifted before being stored, i.e.,
  11602. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  11603. */
  11604. static inline int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(const u8_t *in_tensor,
  11605. const uint16_t in_tensor_dim_x,
  11606. const uint16_t in_tensor_dim_y,
  11607. const uint16_t in_tensor_ch,
  11608. const q7_t *ker_weight,
  11609. const uint16_t out_tensor_ch,
  11610. const uint16_t ker_dim_x,
  11611. const uint16_t ker_dim_y,
  11612. const uint16_t pad_x,
  11613. const uint16_t pad_y,
  11614. const uint16_t stride_x,
  11615. const uint16_t stride_y,
  11616. const q31_t *bias,
  11617. const uint16_t pre_rshift,
  11618. const uint16_t out_scale,
  11619. const uint16_t post_rshift,
  11620. q7_t *out_tensor,
  11621. const uint16_t out_tensor_dim_x,
  11622. const uint16_t out_tensor_dim_y,
  11623. q15_t *in_tmp_buf)
  11624. {
  11625. #if defined(__zcc__)
  11626. return tpt_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(
  11627. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11628. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11629. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11630. out_tensor_dim_y, in_tmp_buf);
  11631. #else
  11632. return riscv_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(
  11633. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11634. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11635. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11636. out_tensor_dim_y, in_tmp_buf);
  11637. #endif
  11638. }
  11639. /**
  11640. * @brief This function performs depthwise convolution for unsigned
  11641. * 8-bit integer inputs and signed 16-bit integer outputs in
  11642. * any x and y dimensions with bias inputs and symmetric
  11643. * quantization on the outputs.
  11644. * @param[in] in_tensor pointer of the input tensor
  11645. * @param[in] in_tensor_dim_x x dimension of the input tensor
  11646. * @param[in] in_tensor_dim_y y dimension of the input tensor
  11647. * @param[in] in_tensor_ch number of input tensor channels
  11648. * @param[in] ker_weight pointer of kernel weights
  11649. * @param[in] out_tensor_ch number of output tensor channels
  11650. * @param[in] ker_dim_x x dimension of the filter kernel
  11651. * @param[in] ker_dim_y y dimension of the filter kernel
  11652. * @param[in] pad_x padding size in the x dimension
  11653. * @param[in] pad_y padding size in the y dimension
  11654. * @param[in] stride_x convolution stride in the x dimension
  11655. * @param[in] stride_y convolution stride in the y dimension
  11656. * @param[in] bias pointer of the bias vector
  11657. * @param[in] pre_rshift right shift amount for the output
  11658. * @param[in] out_scale value of scaling for the output
  11659. * @param[in] post_rshift right shift amount for the output
  11660. * @param[out] out_tensor pointer of the output tensor
  11661. * @param[in] out_tensor_dim_x x dimension of the output tensor
  11662. * @param[in] out_tensor_dim_y y dimension of the output tensor
  11663. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  11664. * It is required when -mext-dsp or
  11665. * -mext-vector is enabled and its size
  11666. * must be equal to "(in_tensor_ch *
  11667. * ker_dim_x * ker_dim_y + 1) / 2".
  11668. * @return This function returns 0 on success; otherwise, it returns -1
  11669. * if its inputs do not meet the constraints that in_tensor_ch
  11670. * must be equal to out_tensor_ch.
  11671. *
  11672. * @note
  11673. * The outputs will be 2-stage shifted before being stored, i.e.,
  11674. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  11675. */
  11676. static inline int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(const u8_t *in_tensor,
  11677. const uint16_t in_tensor_dim_x,
  11678. const uint16_t in_tensor_dim_y,
  11679. const uint16_t in_tensor_ch,
  11680. const q7_t *ker_weight,
  11681. const uint16_t out_tensor_ch,
  11682. const uint16_t ker_dim_x,
  11683. const uint16_t ker_dim_y,
  11684. const uint16_t pad_x,
  11685. const uint16_t pad_y,
  11686. const uint16_t stride_x,
  11687. const uint16_t stride_y,
  11688. const q31_t *bias,
  11689. const uint16_t pre_rshift,
  11690. const uint16_t out_scale,
  11691. const uint16_t post_rshift,
  11692. q15_t *out_tensor,
  11693. const uint16_t out_tensor_dim_x,
  11694. const uint16_t out_tensor_dim_y,
  11695. q15_t *in_tmp_buf)
  11696. {
  11697. #if defined(__zcc__)
  11698. return tpt_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(
  11699. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11700. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11701. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11702. out_tensor_dim_y, in_tmp_buf);
  11703. #else
  11704. return riscv_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(
  11705. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11706. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11707. bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11708. out_tensor_dim_y, in_tmp_buf);
  11709. #endif
  11710. }
  11711. /**
  11712. * @brief This function performs depthwise convolution for signed
  11713. * 8-bit integer inputs/outputs in any x and y dimensions with
  11714. * bias inputs and symmetric quantization on the outputs.
  11715. * @param[in] in_tensor pointer of the input tensor
  11716. * @param[in] in_tensor_dim_x x dimension of the input tensor
  11717. * @param[in] in_tensor_dim_y y dimension of the input tensor
  11718. * @param[in] in_tensor_ch number of input tensor channels
  11719. * @param[in] ker_weight pointer of kernel weights
  11720. * @param[in] out_tensor_ch number of output tensor channels
  11721. * @param[in] ker_dim_x x dimension of the filter kernel
  11722. * @param[in] ker_dim_y y dimension of the filter kernel
  11723. * @param[in] pad_x padding size in the x dimension
  11724. * @param[in] pad_y padding size in the y dimension
  11725. * @param[in] stride_x convolution stride in the x dimension
  11726. * @param[in] stride_y convolution stride in the y dimension
  11727. * @param[in] pre_rshift right shift amount for the output
  11728. * @param[in] out_scale value of scaling for the output
  11729. * @param[in] post_rshift right shift amount for the output
  11730. * @param[out] out_tensor pointer of the output tensor
  11731. * @param[in] out_tensor_dim_x x dimension of the output tensor
  11732. * @param[in] out_tensor_dim_y y dimension of the output tensor
  11733. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  11734. * It is required when -mext-dsp or
  11735. * -mext-vector is enabled and its size
  11736. * must be equal to "(in_tensor_ch *
  11737. * ker_dim_x * ker_dim_y + 1) / 2".
  11738. * @return This function returns 0 on success; otherwise, it returns -1
  11739. * if its inputs do not meet the constraints that in_tensor_ch
  11740. * must be equal to out_tensor_ch.
  11741. *
  11742. * @note
  11743. * The outputs will be 2-stage shifted before being stored, i.e.,
  11744. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  11745. */
  11746. static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_any(const q7_t *in_tensor,
  11747. const uint16_t in_tensor_dim_x,
  11748. const uint16_t in_tensor_dim_y,
  11749. const uint16_t in_tensor_ch,
  11750. const q7_t *ker_weight,
  11751. const uint16_t out_tensor_ch,
  11752. const uint16_t ker_dim_x,
  11753. const uint16_t ker_dim_y,
  11754. const uint16_t pad_x,
  11755. const uint16_t pad_y,
  11756. const uint16_t stride_x,
  11757. const uint16_t stride_y,
  11758. const uint16_t pre_rshift,
  11759. const uint16_t out_scale,
  11760. const uint16_t post_rshift,
  11761. q7_t *out_tensor,
  11762. const uint16_t out_tensor_dim_x,
  11763. const uint16_t out_tensor_dim_y,
  11764. q15_t *in_tmp_buf)
  11765. {
  11766. #if defined(__zcc__)
  11767. return tpt_nn_conv_dw_HWC_s8_s8_s8_sym_any(
  11768. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11769. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11770. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11771. out_tensor_dim_y, in_tmp_buf);
  11772. #else
  11773. return riscv_nn_conv_dw_HWC_s8_s8_s8_sym_any(
  11774. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11775. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11776. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11777. out_tensor_dim_y, in_tmp_buf);
  11778. #endif
  11779. }
  11780. /**
  11781. * @brief This function performs depthwise convolution for signed
  11782. * 8-bit integer inputs and signed 16-bit integer outputs in
  11783. * any x and y dimensions with symmetric quantization on the
  11784. * outputs.
  11785. * @param[in] in_tensor pointer of the input tensor
  11786. * @param[in] in_tensor_dim_x x dimension of the input tensor
  11787. * @param[in] in_tensor_dim_y y dimension of the input tensor
  11788. * @param[in] in_tensor_ch number of input tensor channels
  11789. * @param[in] ker_weight pointer of kernel weights
  11790. * @param[in] out_tensor_ch number of output tensor channels
  11791. * @param[in] ker_dim_x x dimension of the filter kernel
  11792. * @param[in] ker_dim_y y dimension of the filter kernel
  11793. * @param[in] pad_x padding size in the x dimension
  11794. * @param[in] pad_y padding size in the y dimension
  11795. * @param[in] stride_x convolution stride in the x dimension
  11796. * @param[in] stride_y convolution stride in the y dimension
  11797. * @param[in] pre_rshift right shift amount for the output
  11798. * @param[in] out_scale value of scaling for the output
  11799. * @param[in] post_rshift right shift amount for the output
  11800. * @param[out] out_tensor pointer of the output tensor
  11801. * @param[in] out_tensor_dim_x x dimension of the output tensor
  11802. * @param[in] out_tensor_dim_y y dimension of the output tensor
  11803. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  11804. * It is required when -mext-dsp or
  11805. * -mext-vector is enabled and its size
  11806. * must be equal to "(in_tensor_ch *
  11807. * ker_dim_x * ker_dim_y + 1) / 2".
  11808. * @return This function returns 0 on success; otherwise, it returns -1
  11809. * if its inputs do not meet the constraints that in_tensor_ch
  11810. * must be equal to out_tensor_ch.
  11811. *
  11812. * @note
  11813. * The outputs will be 2-stage shifted before being stored, i.e.,
  11814. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  11815. */
  11816. static inline int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_any(const q7_t *in_tensor,
  11817. const uint16_t in_tensor_dim_x,
  11818. const uint16_t in_tensor_dim_y,
  11819. const uint16_t in_tensor_ch,
  11820. const q7_t *ker_weight,
  11821. const uint16_t out_tensor_ch,
  11822. const uint16_t ker_dim_x,
  11823. const uint16_t ker_dim_y,
  11824. const uint16_t pad_x,
  11825. const uint16_t pad_y,
  11826. const uint16_t stride_x,
  11827. const uint16_t stride_y,
  11828. const uint16_t pre_rshift,
  11829. const uint16_t out_scale,
  11830. const uint16_t post_rshift,
  11831. q15_t *out_tensor,
  11832. const uint16_t out_tensor_dim_x,
  11833. const uint16_t out_tensor_dim_y,
  11834. q15_t *in_tmp_buf)
  11835. {
  11836. #if defined(__zcc__)
  11837. return tpt_nn_conv_dw_HWC_s8_s16_s8_sym_any(
  11838. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11839. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11840. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11841. out_tensor_dim_y, in_tmp_buf);
  11842. #else
  11843. return riscv_nn_conv_dw_HWC_s8_s16_s8_sym_any(
  11844. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11845. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11846. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11847. out_tensor_dim_y, in_tmp_buf);
  11848. #endif
  11849. }
  11850. /**
  11851. * @brief This function performs depthwise convolution for unsigned
  11852. * 8-bit integer inputs/outputs in any x and y dimensions with
  11853. * symmetric quantization on the outputs.
  11854. * @param[in] in_tensor pointer of the input tensor
  11855. * @param[in] in_tensor_dim_x x dimension of the input tensor
  11856. * @param[in] in_tensor_dim_y y dimension of the input tensor
  11857. * @param[in] in_tensor_ch number of input tensor channels
  11858. * @param[in] ker_weight pointer of kernel weights
  11859. * @param[in] out_tensor_ch number of output tensor channels
  11860. * @param[in] ker_dim_x x dimension of the filter kernel
  11861. * @param[in] ker_dim_y y dimension of the filter kernel
  11862. * @param[in] pad_x padding size in the x dimension
  11863. * @param[in] pad_y padding size in the y dimension
  11864. * @param[in] stride_x convolution stride in the x dimension
  11865. * @param[in] stride_y convolution stride in the y dimension
  11866. * @param[in] pre_rshift right shift amount for the output
  11867. * @param[in] out_scale value of scaling for the output
  11868. * @param[in] post_rshift right shift amount for the output
  11869. * @param[out] out_tensor pointer of the output tensor
  11870. * @param[in] out_tensor_dim_x x dimension of the output tensor
  11871. * @param[in] out_tensor_dim_y y dimension of the output tensor
  11872. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  11873. * It is required when -mext-dsp or
  11874. * -mext-vector is enabled and its size
  11875. * must be equal to "(in_tensor_ch *
  11876. * ker_dim_x * ker_dim_y + 1) / 2".
  11877. * @return This function returns 0 on success; otherwise, it returns -1
  11878. * if its inputs do not meet the constraints that in_tensor_ch
  11879. * must be equal to out_tensor_ch.
  11880. *
  11881. * @note
  11882. * The outputs will be 2-stage shifted before being stored, i.e.,
  11883. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  11884. */
  11885. static inline int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_any(const u8_t *in_tensor,
  11886. const uint16_t in_tensor_dim_x,
  11887. const uint16_t in_tensor_dim_y,
  11888. const uint16_t in_tensor_ch,
  11889. const q7_t *ker_weight,
  11890. const uint16_t out_tensor_ch,
  11891. const uint16_t ker_dim_x,
  11892. const uint16_t ker_dim_y,
  11893. const uint16_t pad_x,
  11894. const uint16_t pad_y,
  11895. const uint16_t stride_x,
  11896. const uint16_t stride_y,
  11897. const uint16_t pre_rshift,
  11898. const uint16_t out_scale,
  11899. const uint16_t post_rshift,
  11900. u8_t *out_tensor,
  11901. const uint16_t out_tensor_dim_x,
  11902. const uint16_t out_tensor_dim_y,
  11903. q15_t *in_tmp_buf)
  11904. {
  11905. #if defined(__zcc__)
  11906. return tpt_nn_conv_dw_HWC_u8_u8_s8_sym_any(
  11907. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11908. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11909. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11910. out_tensor_dim_y, in_tmp_buf);
  11911. #else
  11912. return riscv_nn_conv_dw_HWC_u8_u8_s8_sym_any(
  11913. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11914. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11915. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11916. out_tensor_dim_y, in_tmp_buf);
  11917. #endif
  11918. }
  11919. /**
  11920. * @brief This function performs depthwise convolution for unsigned
  11921. * 8-bit integer inputs and signed 8-bit integer outputs in any
  11922. * x and y dimensions with symmetric quantization on the
  11923. * outputs.
  11924. * @param[in] in_tensor pointer of the input tensor
  11925. * @param[in] in_tensor_dim_x x dimension of the input tensor
  11926. * @param[in] in_tensor_dim_y y dimension of the input tensor
  11927. * @param[in] in_tensor_ch number of input tensor channels
  11928. * @param[in] ker_weight pointer of kernel weights
  11929. * @param[in] out_tensor_ch number of output tensor channels
  11930. * @param[in] ker_dim_x x dimension of the filter kernel
  11931. * @param[in] ker_dim_y y dimension of the filter kernel
  11932. * @param[in] pad_x padding size in the x dimension
  11933. * @param[in] pad_y padding size in the y dimension
  11934. * @param[in] stride_x convolution stride in the x dimension
  11935. * @param[in] stride_y convolution stride in the y dimension
  11936. * @param[in] pre_rshift right shift amount for the output
  11937. * @param[in] out_scale value of scaling for the output
  11938. * @param[in] post_rshift right shift amount for the output
  11939. * @param[out] out_tensor pointer of the output tensor
  11940. * @param[in] out_tensor_dim_x x dimension of the output tensor
  11941. * @param[in] out_tensor_dim_y y dimension of the output tensor
  11942. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  11943. * It is required when -mext-dsp or
  11944. * -mext-vector is enabled and its size
  11945. * must be equal to "(in_tensor_ch *
  11946. * ker_dim_x * ker_dim_y + 1) / 2".
  11947. * @return This function returns 0 on success; otherwise, it returns -1
  11948. * if its inputs do not meet the constraints that in_tensor_ch
  11949. * must be equal to out_tensor_ch.
  11950. *
  11951. * @note
  11952. * The outputs will be 2-stage shifted before being stored, i.e.,
  11953. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  11954. */
  11955. static inline int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_any(const u8_t *in_tensor,
  11956. const uint16_t in_tensor_dim_x,
  11957. const uint16_t in_tensor_dim_y,
  11958. const uint16_t in_tensor_ch,
  11959. const q7_t *ker_weight,
  11960. const uint16_t out_tensor_ch,
  11961. const uint16_t ker_dim_x,
  11962. const uint16_t ker_dim_y,
  11963. const uint16_t pad_x,
  11964. const uint16_t pad_y,
  11965. const uint16_t stride_x,
  11966. const uint16_t stride_y,
  11967. const uint16_t pre_rshift,
  11968. const uint16_t out_scale,
  11969. const uint16_t post_rshift,
  11970. q7_t *out_tensor,
  11971. const uint16_t out_tensor_dim_x,
  11972. const uint16_t out_tensor_dim_y,
  11973. q15_t *in_tmp_buf)
  11974. {
  11975. #if defined(__zcc__)
  11976. return tpt_nn_conv_dw_HWC_u8_s8_s8_sym_any(
  11977. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11978. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11979. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11980. out_tensor_dim_y, in_tmp_buf);
  11981. #else
  11982. return riscv_nn_conv_dw_HWC_u8_s8_s8_sym_any(
  11983. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  11984. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  11985. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  11986. out_tensor_dim_y, in_tmp_buf);
  11987. #endif
  11988. }
  11989. /**
  11990. * @brief This function performs depthwise convolution for unsigned
  11991. * 8-bit integer inputs and signed 16-bit integer outputs in
  11992. * any x and y dimensions with symmetric quantization on the
  11993. * outputs.
  11994. * @param[in] in_tensor pointer of the input tensor
  11995. * @param[in] in_tensor_dim_x x dimension of the input tensor
  11996. * @param[in] in_tensor_dim_y y dimension of the input tensor
  11997. * @param[in] in_tensor_ch number of input tensor channels
  11998. * @param[in] ker_weight pointer of kernel weights
  11999. * @param[in] out_tensor_ch number of output tensor channels
  12000. * @param[in] ker_dim_x x dimension of the filter kernel
  12001. * @param[in] ker_dim_y y dimension of the filter kernel
  12002. * @param[in] pad_x padding size in the x dimension
  12003. * @param[in] pad_y padding size in the y dimension
  12004. * @param[in] stride_x convolution stride in the x dimension
  12005. * @param[in] stride_y convolution stride in the y dimension
  12006. * @param[in] pre_rshift right shift amount for the output
  12007. * @param[in] out_scale value of scaling for the output
  12008. * @param[in] post_rshift right shift amount for the output
  12009. * @param[out] out_tensor pointer of the output tensor
  12010. * @param[in] out_tensor_dim_x x dimension of the output tensor
  12011. * @param[in] out_tensor_dim_y y dimension of the output tensor
  12012. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  12013. * It is required when -mext-dsp or
  12014. * -mext-vector is enabled and its size
  12015. * must be equal to "(in_tensor_ch *
  12016. * ker_dim_x * ker_dim_y + 1) / 2".
  12017. * @return This function returns 0 on success; otherwise, it returns -1
  12018. * if its inputs do not meet the constraints that in_tensor_ch
  12019. * must be equal to out_tensor_ch.
  12020. *
  12021. * @note
  12022. * The outputs will be 2-stage shifted before being stored, i.e.,
  12023. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  12024. */
  12025. static inline int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_any(const u8_t *in_tensor,
  12026. const uint16_t in_tensor_dim_x,
  12027. const uint16_t in_tensor_dim_y,
  12028. const uint16_t in_tensor_ch,
  12029. const q7_t *ker_weight,
  12030. const uint16_t out_tensor_ch,
  12031. const uint16_t ker_dim_x,
  12032. const uint16_t ker_dim_y,
  12033. const uint16_t pad_x,
  12034. const uint16_t pad_y,
  12035. const uint16_t stride_x,
  12036. const uint16_t stride_y,
  12037. const uint16_t pre_rshift,
  12038. const uint16_t out_scale,
  12039. const uint16_t post_rshift,
  12040. q15_t *out_tensor,
  12041. const uint16_t out_tensor_dim_x,
  12042. const uint16_t out_tensor_dim_y,
  12043. q15_t *in_tmp_buf)
  12044. {
  12045. #if defined(__zcc__)
  12046. return tpt_nn_conv_dw_HWC_u8_s16_s8_sym_any(
  12047. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  12048. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  12049. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  12050. out_tensor_dim_y, in_tmp_buf);
  12051. #else
  12052. return riscv_nn_conv_dw_HWC_u8_s16_s8_sym_any(
  12053. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  12054. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  12055. pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
  12056. out_tensor_dim_y, in_tmp_buf);
  12057. #endif
  12058. }
  12059. /**
  12060. * @brief This function performs 1x1 kernels convolution for signed
  12061. * 8-bit interger inputs/outputs in any x and y dimensions with
  12062. * asymmetric quantization on the outputs.
  12063. * @param[in] in_tensor pointer of the input tensor
  12064. * @param[in] in_tensor_dim_x x dimension of the input tensor
  12065. * @param[in] in_tensor_dim_y y dimension of the input tensor
  12066. * @param[in] in_tensor_ch number of input tensor channels
  12067. * @param[in] in_tensor_group number of input tensor groups
  12068. * @param[in] ker_weight pointer of kernel weights
  12069. * @param[in] out_tensor_ch number of output tensor channels
  12070. * @param[in] pad_x padding size in the x dimension
  12071. * @param[in] pad_y padding size in the y dimension
  12072. * @param[in] stride_x convolution stride in the x dimension
  12073. * @param[in] stride_y convolution stride in the y dimension
  12074. * @param[in] bias pointer of the bias vector
  12075. * @param[out] out_tensor pointer of the output tensor
  12076. * @param[in] out_shift pointer of the shift vector for output
  12077. * tensor
  12078. * @param[in] out_scale pointer of the scaling vector for output
  12079. * tensor
  12080. * @param[in] out_offset value of offset for the output tensor.
  12081. * It should be in the range of -128 to 127.
  12082. * @param[in] in_offset value of offset for the input tensor
  12083. * It should be in the range of -127 to 128.
  12084. * @param[in] act_min minimum value to clip out the ouput
  12085. * tensor. It should be in the range of
  12086. * -128 to 127.
  12087. * @param[in] act_max maximum value to clip out the ouput
  12088. * tensor. It should be in the range of
  12089. * -128 to 127.
  12090. * @param[in] out_tensor_dim_x x dimension of the output tensor
  12091. * @param[in] out_tensor_dim_y y dimension of the output tensor
  12092. * @param[in] tmp_buf dummy
  12093. * @return This function returns 0 on success; otherwise, it returns -1
  12094. * if its inputs do not meet the constraints (see the Note
  12095. * below for details).
  12096. *
  12097. * @note
  12098. * - The input constraints of this function are:
  12099. * - in_tensor_ch is a multiple of 4
  12100. * - pad_x is 0
  12101. * - pad_y is 0
  12102. * - stride_x is 1
  12103. * - stride_y is 1
  12104. */
  12105. static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor,
  12106. const uint16_t in_tensor_dim_x,
  12107. const uint16_t in_tensor_dim_y,
  12108. const uint16_t in_tensor_ch,
  12109. const uint16_t in_tensor_group,
  12110. const q7_t *ker_weight,
  12111. const uint16_t out_tensor_ch,
  12112. const uint16_t pad_x,
  12113. const uint16_t pad_y,
  12114. const uint16_t stride_x,
  12115. const uint16_t stride_y,
  12116. const int32_t *bias,
  12117. q7_t *out_tensor,
  12118. const int32_t *out_shift,
  12119. const int32_t *out_scale,
  12120. const int32_t out_offset,
  12121. const int32_t in_offset,
  12122. const int32_t act_min,
  12123. const int32_t act_max,
  12124. const uint16_t out_tensor_dim_x,
  12125. const uint16_t out_tensor_dim_y,
  12126. q15_t *tmp_buf)
  12127. {
  12128. #if defined(__zcc__)
  12129. tpt_nn_conv_1x1_asym_params aConv_params = {in_offset, out_offset, stride_x,
  12130. stride_y, pad_x, pad_y, act_min, act_max};
  12131. tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
  12132. tpt_nn_1x1_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
  12133. in_tensor_group, out_tensor_ch};
  12134. return tpt_convolve_1x1_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
  12135. bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
  12136. #else
  12137. return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(
  12138. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
  12139. in_tensor_group, ker_weight, out_tensor_ch, pad_x, pad_y, stride_x,
  12140. stride_y, bias, out_tensor, out_shift, out_scale, out_offset, in_offset,
  12141. act_min, act_max, out_tensor_dim_x, out_tensor_dim_y, tmp_buf);
  12142. #endif
  12143. }
  12144. /**
  12145. * @brief This function is used to get the needed size, in bytes, by
  12146. * the input temporary buffer of riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any.
  12147. * @param[in] in_tensor_ch number of input tensor channels
  12148. * @return This function returns the needed size by the temporary buffer.
  12149. */
  12150. static inline int32_t
  12151. hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
  12152. const uint16_t in_tensor_ch) {
  12153. #if defined(__zcc__)convol
  12154. return tpt_convolve_1x1_s8_s8_s8_asym_bias_any_get_buf_size(
  12155. in_tensor_ch);
  12156. #else
  12157. return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
  12158. in_tensor_ch);
  12159. #endif
  12160. }
  12161. /**
  12162. * @brief This function performs 1xn kernels convolution for signed
  12163. * 8-bit integer inputs/outputs in any x and y dimensions with
  12164. * asymmetric quantization on the outputs.
  12165. * @param[in] in_tensor pointer of the input tensor
  12166. * @param[in] in_tensor_dim_x x dimension of the input tensor
  12167. * @param[in] in_tensor_ch number of input tensor channels
  12168. * @param[in] in_tensor_group dummy
  12169. * @param[in] ker_weight pointer of kernel weights
  12170. * @param[in] out_tensor_ch number of output tensor channels
  12171. * @param[in] ker_dim_x x dimension of the filter kernel
  12172. * @param[in] pad_x padding size in the x dimension
  12173. * @param[in] stride_x convolution stride in the x dimension
  12174. * @param[in] bias pointer of the bias vector
  12175. * @param[out] out_tensor pointer of the output tensor
  12176. * @param[in] out_shift pointer of the shift vector for output
  12177. * tensor
  12178. * @param[in] out_scale pointer of the scaling vector for output
  12179. * tensor
  12180. * @param[in] out_offset value of offset for the output tensor.
  12181. * It should be in the range of -128 to 127.
  12182. * @param[in] in_offset value of offset for the input tensor
  12183. * It should be in the range of -127 to 128.
  12184. * @param[in] act_min minimum value to clip out the ouput
  12185. * tensor. It should be in the range of
  12186. * -128 to 127.
  12187. * @param[in] act_max maximum value to clip out the ouput
  12188. * tensor. It should be in the range of
  12189. * -128 to 127.
  12190. * @param[in] out_tensor_dim_x x dimension of the output tensor
  12191. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  12192. * It is required when -mext-dsp or
  12193. * -mext-vector is enabled and its needed
  12194. * size could be get by calling riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size.
  12195. * @return This function returns 0 on success; otherwise, it returns -1
  12196. * if its inputs do not meet the constraint that
  12197. * out_tensor_dim_x is a multiple of 4.
  12198. */
  12199. static inline int hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
  12200. const uint16_t in_tensor_dim_x,
  12201. const uint16_t in_tensor_ch,
  12202. const uint16_t in_tensor_group,
  12203. const q7_t *ker_weight,
  12204. const uint16_t out_tensor_ch,
  12205. const uint16_t ker_dim_x,
  12206. const uint16_t pad_x,
  12207. const uint16_t stride_x,
  12208. const int32_t *bias,
  12209. q7_t *out_tensor,
  12210. const int32_t *out_shift,
  12211. const int32_t *out_scale,
  12212. const int32_t out_offset,
  12213. const int32_t in_offset,
  12214. const int32_t act_min,
  12215. const int32_t act_max,
  12216. const uint16_t out_tensor_dim_x,
  12217. q15_t *in_tmp_buf)
  12218. {
  12219. #if defined(__zcc__)
  12220. tpt_nn_conv_1xn_asym_params aConv_params = {in_offset, out_offset, stride_x, pad_x,
  12221. act_min, act_max};
  12222. tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
  12223. tpt_nn_1xn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_ch, in_tensor_group,
  12224. ker_dim_x, out_tensor_dim_x, out_tensor_ch};
  12225. return tpt_convolve_1xn_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
  12226. bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
  12227. #else
  12228. return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(
  12229. in_tensor, in_tensor_dim_x, in_tensor_ch, in_tensor_group, ker_weight,
  12230. out_tensor_ch, ker_dim_x, pad_x, stride_x, bias, out_tensor, out_shift,
  12231. out_scale, out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
  12232. in_tmp_buf);
  12233. #endif
  12234. }
  12235. /**
  12236. * @brief This function is used to get the needed size, in bytes, by
  12237. * the input temporary buffer of riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any.
  12238. * @param[in] in_tensor_ch number of input tensor channels
  12239. * @param[in] ker_dim_x x dimension of the filter kernel
  12240. * @param[in] ker_dim_y y dimension of the filter kernel. It is
  12241. * always 1 here.
  12242. * @return This function returns the needed size by the temporary buffer.
  12243. */
  12244. static inline int32_t hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch,
  12245. const uint16_t ker_dim_x,
  12246. const uint16_t ker_dim_y)
  12247. {
  12248. #if defined(__zcc__)
  12249. return tpt_convolve_1xn_s8_s8_s8_asym_bias_any_get_buffer_size(
  12250. in_tensor_ch, ker_dim_x, ker_dim_y);
  12251. #else
  12252. return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
  12253. in_tensor_ch, ker_dim_x, ker_dim_y);
  12254. #endif
  12255. }
  12256. /**
  12257. * @brief This function performs convolution for signed 8-bit integer
  12258. * inputs/outputs in any x and y dimensions with asymmetric
  12259. * quantization on the outputs.
  12260. * @param[in] in_tensor pointer of the input tensor
  12261. * @param[in] in_tensor_dim_x x dimension of the input tensor
  12262. * @param[in] in_tensor_dim_y y dimension of the input tensor
  12263. * @param[in] in_tensor_ch number of input tensor channels
  12264. * @param[in] in_tensor_group number of input tensor groups
  12265. * @param[in] ker_weight pointer of kernel weights
  12266. * @param[in] out_tensor_ch number of output tensor channels
  12267. * @param[in] ker_dim_x x dimension of the filter kernel
  12268. * @param[in] ker_dim_y y dimension of the filter kernel
  12269. * @param[in] pad_x padding size in the x dimension
  12270. * @param[in] pad_y padding size in the y dimension
  12271. * @param[in] stride_x convolution stride in the x dimension
  12272. * @param[in] stride_y convolution stride in the y dimension
  12273. * @param[in] bias pointer of the bias vector
  12274. * @param[out] out_tensor pointer of the output tensor
  12275. * @param[in] out_shift pointer of the shift vector for output
  12276. * tensor
  12277. * @param[in] out_scale pointer of the scaling vector for output
  12278. * tensor
  12279. * @param[in] out_offset value of offset for the output tensor.
  12280. * It should be in the range of -128 to 127.
  12281. * @param[in] in_offset value of offset for the input tensor
  12282. * It should be in the range of -127 to 128.
  12283. * @param[in] act_min minimum value to clip out the ouput
  12284. * tensor. It should be in the range of
  12285. * -128 to 127.
  12286. * @param[in] act_max maximum value to clip out the ouput
  12287. * tensor. It should be in the range of
  12288. * -128 to 127.
  12289. * @param[in] out_tensor_dim_x x dimension of the output tensor
  12290. * @param[in] out_tensor_dim_y y dimension of the output tensor
  12291. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  12292. * It is required when -mext-dsp or
  12293. * -mext-vector is enabled and its needed
  12294. * size could be get by calling riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size.
  12295. * @return This function only returns 0.
  12296. */
  12297. static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
  12298. const uint16_t in_tensor_dim_x,
  12299. const uint16_t in_tensor_dim_y,
  12300. const uint16_t in_tensor_ch,
  12301. const uint16_t in_tensor_group,
  12302. const q7_t *ker_weight,
  12303. const uint16_t out_tensor_ch,
  12304. const uint16_t ker_dim_x,
  12305. const uint16_t ker_dim_y,
  12306. const uint16_t pad_x,
  12307. const uint16_t pad_y,
  12308. const uint16_t stride_x,
  12309. const uint16_t stride_y,
  12310. const int32_t *bias,
  12311. q7_t *out_tensor,
  12312. const int32_t *out_shift,
  12313. const int32_t *out_scale,
  12314. const int32_t out_offset,
  12315. const int32_t in_offset,
  12316. const int32_t act_min,
  12317. const int32_t act_max,
  12318. const uint16_t out_tensor_dim_x,
  12319. const uint16_t out_tensor_dim_y,
  12320. q15_t *in_tmp_buf)
  12321. {
  12322. #if defined(__zcc__)
  12323. tpt_nn_conv_asym_params aConv_params = {stride_x, stride_y, pad_x, pad_y,
  12324. in_offset, out_offset, act_min, act_max};
  12325. tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
  12326. tpt_nn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
  12327. in_tensor_group, ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y,
  12328. out_tensor_ch};
  12329. return tpt_convolve_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
  12330. bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
  12331. #else
  12332. return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any(
  12333. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
  12334. in_tensor_group, ker_weight, out_tensor_ch, ker_dim_x, ker_dim_y, pad_x,
  12335. pad_y, stride_x, stride_y, bias, out_tensor, out_shift, out_scale,
  12336. out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
  12337. out_tensor_dim_y, in_tmp_buf);
  12338. #endif
  12339. }
  12340. /**
  12341. * @brief This function is used to get the needed size, in bytes, by
  12342. * the input temporary buffer of riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any.
  12343. * @param[in] in_tensor_ch number of input tensor channels
  12344. * @param[in] ker_dim_x x dimension of the filter kernel
  12345. * @param[in] ker_dim_y y dimension of the filter kernel
  12346. * @return This function returns the needed size by the temporary buffer.
  12347. */
  12348. static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch,
  12349. const uint16_t ker_dim_x,
  12350. const uint16_t ker_dim_y)
  12351. {
  12352. #if defined(__zcc__)
  12353. return tpt_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
  12354. in_tensor_ch, ker_dim_x, ker_dim_y);
  12355. #else
  12356. return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
  12357. in_tensor_ch, ker_dim_x, ker_dim_y);
  12358. #endif
  12359. }
  12360. /**
  12361. * @brief This function performs depthwise 3x3 kernels convolution for
  12362. * signed 8-bit integer inputs/outputs in any x and y
  12363. * dimensions with asymmetric quantization on the outputs.
  12364. * @param[in] in_tensor pointer of the input tensor
  12365. * @param[in] in_tensor_dim_x x dimension of the input tensor
  12366. * @param[in] in_tensor_dim_y y dimension of the input tensor
  12367. * @param[in] in_tensor_ch number of input tensor channels
  12368. * @param[in] ker_weight pointer of kernel weights
  12369. * @param[in] out_tensor_ch number of output tensor channels
  12370. * @param[in] pad_x padding size in the x dimension
  12371. * @param[in] pad_y padding size in the y dimension
  12372. * @param[in] stride_x convolution stride in the x dimension
  12373. * @param[in] stride_y convolution stride in the y dimension
  12374. * @param[in] bias pointer of the bias vector
  12375. * @param[out] out_tensor pointer of the output tensor
  12376. * @param[in] out_shift pointer of the shift vector for output
  12377. * tensor
  12378. * @param[in] out_scale pointer of the scaling vector for output
  12379. * tensor
  12380. * @param[in] out_tensor_dim_x x dimension of the output tensor
  12381. * @param[in] out_tensor_dim_y y dimension of the output tensor
  12382. * @param[in] out_offset value of offset for the output tensor.
  12383. * It should be in the range of -128 to 127.
  12384. * @param[in] in_offset value of offset for the input tensor
  12385. * It should be in the range of -127 to 128.
  12386. * @param[in] act_min minimum value to clip out the ouput
  12387. * tensor. It should be in the range of
  12388. * -128 to 127.
  12389. * @param[in] act_max maximum value to clip out the ouput
  12390. * tensor. It should be in the range of
  12391. * -128 to 127.
  12392. * @param[in] dilation_x dummy
  12393. * @param[in] dilation_y dummy
  12394. * @param[in] tmp_buf dummy
  12395. * @return This function returns 0 on success; otherwise, it returns -1
  12396. * if its inputs do not meet the constraints that in_tensor_ch
  12397. * has to be equal to out_tensor_ch and pad_x is less than 1.
  12398. */
  12399. static inline int32_t hpm_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(const int8_t *in_tensor,
  12400. const int32_t in_tensor_dim_x,
  12401. const int32_t in_tensor_dim_y,
  12402. const int32_t in_tensor_ch,
  12403. const int8_t *ker_weight,
  12404. const int32_t out_tensor_ch,
  12405. const int32_t pad_x,
  12406. const int32_t pad_y,
  12407. const int32_t stride_x,
  12408. const int32_t stride_y,
  12409. const int32_t *bias,
  12410. int8_t *out_tensor,
  12411. const int32_t *out_shift,
  12412. const int32_t *out_scale,
  12413. const int32_t out_tensor_dim_x,
  12414. const int32_t out_tensor_dim_y,
  12415. const int32_t out_offset,
  12416. const int32_t in_offset,
  12417. const int32_t act_min,
  12418. const int32_t act_max,
  12419. const int32_t dilation_x,
  12420. const int32_t dilation_y,
  12421. int16_t *tmp_buf)
  12422. {
  12423. #if defined(__zcc__)
  12424. return tpt_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(
  12425. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  12426. out_tensor_ch, pad_x, pad_y, stride_x, stride_y, bias, out_tensor,
  12427. out_shift, out_scale, out_tensor_dim_x, out_tensor_dim_y, out_offset,
  12428. in_offset, act_min, act_max, dilation_x, dilation_y, tmp_buf);
  12429. #else
  12430. return riscv_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(
  12431. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  12432. out_tensor_ch, pad_x, pad_y, stride_x, stride_y, bias, out_tensor,
  12433. out_shift, out_scale, out_tensor_dim_x, out_tensor_dim_y, out_offset,
  12434. in_offset, act_min, act_max, dilation_x, dilation_y, tmp_buf);
  12435. #endif
  12436. }
  12437. /**
  12438. * @brief This function performs depthwise convolution for signed
  12439. * 8-bit interger inputs/outputs in any x and y dimensions with
  12440. * asymmetric quantization on the outputs.
  12441. * @param[in] in_tensor pointer of the input tensor
  12442. * @param[in] in_tensor_dim_x x dimension of the input tensor
  12443. * @param[in] in_tensor_dim_y y dimension of the input tensor
  12444. * @param[in] in_tensor_ch number of input tensor channels
  12445. * @param[in] ker_weight pointer of kernel weights
  12446. * @param[in] out_tensor_ch number of output tensor channels.
  12447. * out_tensor_ch is equal to ch_mult *
  12448. * in_tensor_ch.
  12449. * @param[in] ch_mult multiplier of input tensor channels
  12450. * @param[in] ker_dim_x x dimension of the filter kernel
  12451. * @param[in] ker_dim_y y dimension of the filter kernel
  12452. * @param[in] pad_x padding size in the x dimension
  12453. * @param[in] pad_y padding size in the y dimension
  12454. * @param[in] stride_x convolution stride in the x dimension
  12455. * @param[in] stride_y convolution stride in the y dimension
  12456. * @param[in] bias pointer of the bias vector
  12457. * @param[out] out_tensor pointer of the output tensor
  12458. * @param[in] out_shift pointer of the shift vector for output
  12459. * tensor
  12460. * @param[in] out_scale pointer of the scaling vector for output
  12461. * tensor
  12462. * @param[in] out_tensor_dim_x x dimension of the output tensor
  12463. * @param[in] out_tensor_dim_y y dimension of the output tensor
  12464. * @param[in] out_offset value of offset for the output tensor.
  12465. * It should be in the range of -128 to 127.
  12466. * @param[in] in_offset value of offset for the input tensor
  12467. * It should be in the range of -127 to 128.
  12468. * @param[in] act_min minimum value to clip out the ouput
  12469. * tensor. It should be in the range of
  12470. * -128 to 127.
  12471. * @param[in] act_max maximum value to clip out the ouput
  12472. * tensor. It should be in the range of
  12473. * -128 to 127.
  12474. * @param[in] dilation_x dummy
  12475. * @param[in] dilation_y dummy
  12476. * @param[in] tmp_buf dummy
  12477. * @return This function only returns 0.
  12478. *
  12479. * @b Example:
  12480. * @code
  12481. * to be modified...
  12482. * @endcode
  12483. */
  12484. static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
  12485. const uint16_t in_tensor_dim_x,
  12486. const uint16_t in_tensor_dim_y,
  12487. const uint16_t in_tensor_ch,
  12488. const q7_t *ker_weight,
  12489. const uint16_t out_tensor_ch,
  12490. const uint16_t ch_mult,
  12491. const uint16_t ker_dim_x,
  12492. const uint16_t ker_dim_y,
  12493. const uint16_t pad_x,
  12494. const uint16_t pad_y,
  12495. const uint16_t stride_x,
  12496. const uint16_t stride_y,
  12497. const int32_t *bias,
  12498. q7_t *out_tensor,
  12499. const int32_t *out_shift,
  12500. const int32_t *out_scale,
  12501. const uint16_t out_tensor_dim_x,
  12502. const uint16_t out_tensor_dim_y,
  12503. const int32_t out_offset,
  12504. const int32_t in_offset,
  12505. const int32_t act_min,
  12506. const int32_t act_max,
  12507. const uint16_t dilation_x,
  12508. const uint16_t dilation_y,
  12509. q15_t *tmp_buf)
  12510. {
  12511. #if defined(__zcc__)
  12512. tpt_nn_dw_conv_asym_params aConv_params = {in_offset, out_offset, ch_mult,
  12513. stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
  12514. tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
  12515. tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
  12516. ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
  12517. return tpt_depthwise_conv_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
  12518. bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
  12519. #else
  12520. return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(
  12521. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  12522. out_tensor_ch, ch_mult, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x,
  12523. stride_y, bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
  12524. out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
  12525. dilation_y, tmp_buf);
  12526. #endif
  12527. }
  12528. /**
  12529. * @brief This function performs fast depthwise convolution for signed
  12530. * 8-bit integer inputs/outputs in any x and y dimensions with
  12531. * asymmetric quantization on the outputs.
  12532. * @param[in] in_tensor pointer of the input tensor
  12533. * @param[in] in_tensor_dim_x x dimension of the input tensor
  12534. * @param[in] in_tensor_dim_y y dimension of the input tensor
  12535. * @param[in] in_tensor_ch number of input tensor channels
  12536. * @param[in] ker_weight pointer of kernel weights
  12537. * @param[in] out_tensor_ch number of output tensor channels
  12538. * @param[in] ker_dim_x x dimension of the filter kernel
  12539. * @param[in] ker_dim_y y dimension of the filter kernel
  12540. * @param[in] pad_x padding size in the x dimension
  12541. * @param[in] pad_y padding size in the y dimension
  12542. * @param[in] stride_x convolution stride in the x dimension
  12543. * @param[in] stride_y convolution stride in the y dimension
  12544. * @param[in] bias pointer of the bias vector
  12545. * @param[out] out_tensor pointer of the output tensor
  12546. * @param[in] out_shift pointer of the shift vector for output
  12547. * tensor
  12548. * @param[in] out_scale pointer of the scaling vector for output
  12549. * tensor
  12550. * @param[in] out_tensor_dim_x x dimension of the output tensor
  12551. * @param[in] out_tensor_dim_y y dimension of the output tensor
  12552. * @param[in] out_offset value of offset for the output tensor.
  12553. * It should be in the range of -128 to 127.
  12554. * @param[in] in_offset value of offset for the input tensor
  12555. * It should be in the range of -127 to 128.
  12556. * @param[in] act_min minimum value to clip out the ouput
  12557. * tensor. It should be in the range of
  12558. * -128 to 127.
  12559. * @param[in] act_max maximum value to clip out the ouput
  12560. * tensor. It should be in the range of
  12561. * -128 to 127.
  12562. * @param[in] dilation_x dummy
  12563. * @param[in] dilation_y dummy
  12564. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  12565. * It is required when -mext-dsp or
  12566. * -mext-vector is enabled and its needed
  12567. * size could be get by calling riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size.
  12568. * @return This function returns 0 on success; otherwise, it returns -1
  12569. * if its inputs do not meet the constraint that in_tensor_ch
  12570. * has to be equal to out_tensor_ch.
  12571. */
  12572. static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor,
  12573. const uint16_t in_tensor_dim_x,
  12574. const uint16_t in_tensor_dim_y,
  12575. const uint16_t in_tensor_ch,
  12576. const q7_t *ker_weight,
  12577. const uint16_t out_tensor_ch,
  12578. const uint16_t ker_dim_x,
  12579. const uint16_t ker_dim_y,
  12580. const uint16_t pad_x,
  12581. const uint16_t pad_y,
  12582. const uint16_t stride_x,
  12583. const uint16_t stride_y,
  12584. const int32_t *bias,
  12585. q7_t *out_tensor,
  12586. const int32_t *out_shift,
  12587. const int32_t *out_scale,
  12588. const uint16_t out_tensor_dim_x,
  12589. const uint16_t out_tensor_dim_y,
  12590. const int32_t out_offset,
  12591. const int32_t in_offset,
  12592. const int32_t act_min,
  12593. const int32_t act_max,
  12594. const uint16_t dilation_x,
  12595. const uint16_t dilation_y,
  12596. q15_t *in_tmp_buf)
  12597. {
  12598. #if defined(__zcc__)
  12599. tpt_nn_dw_conv_asym_fast_params aConv_params = {in_offset, out_offset,
  12600. stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
  12601. tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
  12602. tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
  12603. ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
  12604. return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any(out_tensor, in_tensor, ker_weight,
  12605. bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
  12606. #else
  12607. return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(
  12608. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  12609. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  12610. bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
  12611. out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
  12612. dilation_y, in_tmp_buf);
  12613. #endif
  12614. }
  12615. /**
  12616. * @brief This function is used to get the needed size, in bytes, by
  12617. * the input temporary buffer of riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any.
  12618. * @param[in] in_tensor_ch number of input tensor channels
  12619. * @param[in] ker_dim_x x dimension of the filter kernel
  12620. * @param[in] ker_dim_y y dimension of the filter kernel
  12621. * @return This function returns the needed size by the temporary buffer.
  12622. */
  12623. static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch,
  12624. const uint16_t ker_dim_x,
  12625. const uint16_t ker_dim_y)
  12626. {
  12627. #if defined(__zcc__)
  12628. return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
  12629. in_tensor_ch, ker_dim_x, ker_dim_y);
  12630. #else
  12631. return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
  12632. in_tensor_ch, ker_dim_x, ker_dim_y);
  12633. #endif
  12634. }
  12635. /**
  12636. * @brief This function performs depthwise convolution for unsigned
  12637. * 8-bit integer inputs/outputs in any x and y dimensions with
  12638. * asymmetric quantization on the outputs.
  12639. * @param[in] in_tensor pointer of the input tensor
  12640. * @param[in] in_tensor_dim_x x dimension of the input tensor
  12641. * @param[in] in_tensor_dim_y y dimension of the input tensor
  12642. * @param[in] in_tensor_ch number of input tensor channels
  12643. * @param[in] ker_weight pointer of kernel weights
  12644. * @param[in] ker_dim_x x dimension of the filter kernel
  12645. * @param[in] ker_dim_y y dimension of the filter kernel
  12646. * @param[in] ch_mult multiplier of input tensor channels
  12647. * @param[in] pad_x padding size in the x dimension
  12648. * @param[in] pad_y padding size in the y dimension
  12649. * @param[in] stride_x convolution stride in the x dimension
  12650. * @param[in] stride_y convolution stride in the y dimension
  12651. * @param[in] dilation_x dummy
  12652. * @param[in] dilation_y dummy
  12653. * @param[in] bias pointer of the bias vector
  12654. * @param[in] in_offset value of offset for the input tensor
  12655. * It should be in the range of -255 to 0.
  12656. * @param[in] ker_offset value of offset for the filter kernel
  12657. * It should be in the range of -255 to 0.
  12658. * @param[in] out_offset value of offset for the output tensor.
  12659. * It should be in the range of 0 to 255.
  12660. * @param[in] out_tensor pointer of the output tensor
  12661. * @param[in] out_tensor_dim_x x dimension of the output tensor
  12662. * @param[in] out_tensor_dim_y y dimension of the output tensor
  12663. * @param[in] act_min minimum value to clip out the ouput
  12664. * tensor. It should be in the range of
  12665. * 0 to 255.
  12666. * @param[in] act_max maximum value to clip out the ouput
  12667. * tensor. It should be in the range of
  12668. * 0 to 255.
  12669. * @param[in] out_shift shift amount for the output tensor
  12670. * @param[in] out_scale value of sacling for the output tensor
  12671. * @return This function returns 0 on success; otherwise, it returns -1
  12672. * if its inputs do not meet the constraint that both ch_mult
  12673. * and ker_dim_x are multiple of 2.
  12674. */
  12675. static inline int32_t hpm_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(const uint8_t *in_tensor,
  12676. const uint16_t in_tensor_dim_x,
  12677. const uint16_t in_tensor_dim_y,
  12678. const uint16_t in_tensor_ch,
  12679. const uint8_t *ker_weight,
  12680. const uint16_t ker_dim_x,
  12681. const uint16_t ker_dim_y,
  12682. const int16_t ch_mult,
  12683. const int16_t pad_x,
  12684. const int16_t pad_y,
  12685. const int16_t stride_x,
  12686. const int16_t stride_y,
  12687. const int16_t dilation_x,
  12688. const int16_t dilation_y,
  12689. const int32_t *bias,
  12690. const int32_t in_offset,
  12691. const int32_t ker_offset,
  12692. const int32_t out_offset,
  12693. uint8_t *out_tensor,
  12694. const uint16_t out_tensor_dim_x,
  12695. const uint16_t out_tensor_dim_y,
  12696. const int32_t act_min,
  12697. const int32_t act_max,
  12698. const int32_t out_shift,
  12699. const int32_t out_scale)
  12700. {
  12701. #if defined(__zcc__)
  12702. return tpt_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(
  12703. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  12704. ker_dim_x, ker_dim_y, ch_mult, pad_x, pad_y, stride_x, stride_y,
  12705. dilation_x, dilation_y, bias, in_offset, ker_offset, out_offset,
  12706. out_tensor, out_tensor_dim_x, out_tensor_dim_y, act_min, act_max,
  12707. out_shift, out_scale);
  12708. #else
  12709. return riscv_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(
  12710. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  12711. ker_dim_x, ker_dim_y, ch_mult, pad_x, pad_y, stride_x, stride_y,
  12712. dilation_x, dilation_y, bias, in_offset, ker_offset, out_offset,
  12713. out_tensor, out_tensor_dim_x, out_tensor_dim_y, act_min, act_max,
  12714. out_shift, out_scale);
  12715. #endif
  12716. }
  12717. #ifdef __riscv_zfh
  12718. /**
  12719. * @brief This function performs 1x1 kernels convolution for 16-bit
  12720. * half-precision floating point inputs/outputs in any x and y
  12721. * dimensions.
  12722. * @param[in] in_tensor pointer of the input tensor
  12723. * @param[in] in_tensor_dim_x x dimension of the input tensor
  12724. * @param[in] in_tensor_dim_y y dimension of the input tensor
  12725. * @param[in] in_tensor_ch number of input tensor channels
  12726. * @param[in] ker_weight pointer of kernel weights
  12727. * @param[in] out_tensor_ch number of output tensor channels
  12728. * @param[in] ker_dim_x x dimension of the filter kernel
  12729. * @param[in] ker_dim_y y dimension of the filter kernel
  12730. * @param[in] pad_x padding size in the x dimension
  12731. * @param[in] pad_y padding size in the y dimension
  12732. * @param[in] stride_x convolution stride in the x dimension
  12733. * @param[in] stride_y convolution stride in the y dimension
  12734. * @param[in] bias pointer of the bias vector
  12735. * @param[out] out_tensor pointer of the output tensor
  12736. * @param[in] out_tensor_dim_x x dimension of the output tensor
  12737. * @param[in] out_tensor_dim_y y dimension of the output tensor
  12738. * @param[in] in_tmp_buf dummy
  12739. * @param[in] tmp_buf dummy
  12740. * @return This function only returns 0.
  12741. *
  12742. * @note
  12743. * - The input constraints of this function are:
  12744. * - in_tensor_ch is a multiple of 4
  12745. * - out_tensor_ch is a multiple of 2
  12746. * - ker_dim_x is 1
  12747. * - ker_dim_y is 1
  12748. * - pad_x is 0
  12749. * - pad_y is 0
  12750. * - stride_x is 1
  12751. * - stride_y is 1
  12752. */
  12753. static inline int32_t hpm_nn_conv_1x1_HWC_f16_f16_f16_bias_any(const float16_t *in_tensor,
  12754. const uint16_t in_tensor_dim_x,
  12755. const uint16_t in_tensor_dim_y,
  12756. const uint16_t in_tensor_ch,
  12757. const float16_t *ker_weight,
  12758. const uint16_t out_tensor_ch,
  12759. const uint16_t ker_dim_x,
  12760. const uint16_t ker_dim_y,
  12761. const uint16_t pad_x,
  12762. const uint16_t pad_y,
  12763. const uint16_t stride_x,
  12764. const uint16_t stride_y,
  12765. const float16_t *bias,
  12766. float16_t *out_tensor,
  12767. const uint16_t out_tensor_dim_x,
  12768. const uint16_t out_tensor_dim_y,
  12769. float16_t *in_tmp_buf,
  12770. float16_t *tmp_buf)
  12771. {
  12772. #if defined(__zcc__)
  12773. return tpt_nn_conv_1x1_HWC_f16_f16_f16_bias_any(
  12774. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  12775. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  12776. bias, out_tensor, out_tensor_dim_x, out_tensor_dim_y, in_tmp_buf,
  12777. tmp_buf);
  12778. #else
  12779. return riscv_nn_conv_1x1_HWC_f16_f16_f16_bias_any(
  12780. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  12781. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  12782. bias, out_tensor, out_tensor_dim_x, out_tensor_dim_y, in_tmp_buf,
  12783. tmp_buf);
  12784. #endif
  12785. }
  12786. /**
  12787. * @brief This function performs convolution for 16-bit half-precision
  12788. * floating point inputs/outputs.
  12789. * @param[in] in_tensor pointer of the input tensor
  12790. * @param[in] in_tensor_dim dimension of the input tensor
  12791. * @param[in] in_tensor_ch number of input tensor channels
  12792. * @param[in] ker_weight pointer of kernel weights
  12793. * @param[in] out_tensor_ch number of output tensor channels
  12794. * @param[in] ker_dim dimension of the filter kernel
  12795. * @param[in] pad padding size
  12796. * @param[in] stride convolution stride
  12797. * @param[in] bias pointer of the bias vector
  12798. * @param[out] out_tensor pointer of the output tensor
  12799. * @param[in] out_tensor_dim dimension of the output tensor
  12800. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  12801. * It is required when -mext-vector is
  12802. * enabled and its size must be equal to
  12803. * "2 * in_tensor_ch * ker_dim * ker_dim".
  12804. * @param[in] tmp_buf dummy
  12805. * @return This function returns 0.
  12806. */
  12807. static inline int32_t hpm_nn_conv_HWC_f16_f16_f16_bias(const float16_t *in_tensor,
  12808. const uint16_t in_tensor_dim,
  12809. const uint16_t in_tensor_ch,
  12810. const float16_t *ker_weight,
  12811. const uint16_t out_tensor_ch,
  12812. const uint16_t ker_dim,
  12813. const uint16_t pad,
  12814. const uint16_t stride,
  12815. const float16_t *bias,
  12816. float16_t *out_tensor,
  12817. const uint16_t out_tensor_dim,
  12818. float16_t *in_tmp_buf,
  12819. float16_t *tmp_buf)
  12820. {
  12821. #if defined(__zcc__)
  12822. return tpt_nn_conv_HWC_f16_f16_f16_bias(
  12823. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  12824. ker_dim, pad, stride, bias, out_tensor, out_tensor_dim, in_tmp_buf,
  12825. tmp_buf);
  12826. #else
  12827. return riscv_nn_conv_HWC_f16_f16_f16_bias(
  12828. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  12829. ker_dim, pad, stride, bias, out_tensor, out_tensor_dim, in_tmp_buf,
  12830. tmp_buf);
  12831. #endif
  12832. }
  12833. /**
  12834. * @brief This function performs depthwise convolution for 16-bit
  12835. * half-precision floating point inputs/outputs
  12836. * @param[in] in_tensor pointer of the input tensor
  12837. * @param[in] in_tensor_dim dimension of the input tensor
  12838. * @param[in] in_tensor_ch number of input tensor channels
  12839. * @param[in] ker_weight pointer of kernel weights
  12840. * @param[in] out_tensor_ch number of output tensor channels
  12841. * @param[in] ker_dim dimension of the filter kernel
  12842. * @param[in] pad padding size
  12843. * @param[in] stride convolution stride
  12844. * @param[in] bias pointer of the bias vector
  12845. * @param[out] out_tensor pointer of the output tensor
  12846. * @param[in] out_tensor_dim dimension of the output tensor
  12847. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  12848. * It is required when -mext-vector is
  12849. * enabled and its size must be equal to
  12850. * "in_tensor_ch * ker_dim * ker_dim".
  12851. * @param[in] tmp_buf dummy
  12852. * @return This function returns 0.
  12853. */
  12854. static inline int32_t hpm_nn_conv_dw_HWC_f16_f16_f16_bias(const float16_t *in_tensor,
  12855. const uint16_t in_tensor_dim,
  12856. const uint16_t in_tensor_ch,
  12857. const float16_t *ker_weight,
  12858. const uint16_t out_tensor_ch,
  12859. const uint16_t ker_dim,
  12860. const uint16_t pad,
  12861. const uint16_t stride,
  12862. const float16_t *bias,
  12863. float16_t *out_tensor,
  12864. const uint16_t out_tensor_dim,
  12865. float16_t *in_tmp_buf,
  12866. float16_t *tmp_buf)
  12867. {
  12868. #if defined(__zcc__)
  12869. return tpt_nn_conv_dw_HWC_f16_f16_f16_bias(
  12870. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  12871. ker_dim, pad, stride, bias, out_tensor, out_tensor_dim, in_tmp_buf,
  12872. tmp_buf);
  12873. #else
  12874. return riscv_nn_conv_dw_HWC_f16_f16_f16_bias(
  12875. in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
  12876. ker_dim, pad, stride, bias, out_tensor, out_tensor_dim, in_tmp_buf,
  12877. tmp_buf);
  12878. #endif
  12879. }
  12880. #endif
  12881. /**
  12882. * * @}
  12883. */
  12884. #endif
  12885. #ifdef HPM_EN_MATH_NN_RVP32_LIB
  12886. #if defined(__zcc__)
  12887. #include "tpt_nn_convolution.h"
  12888. #else
  12889. #include "riscv_nn_convolution.h"
  12890. #endif
  12891. /**
  12892. * @brief This function performs convolution for signed 8-bit integer
  12893. * inputs/outputs in any x and y dimensions with asymmetric
  12894. * quantization on the outputs.
  12895. * @param[in] in_tensor pointer of the input tensor
  12896. * @param[in] in_tensor_dim_x x dimension of the input tensor
  12897. * @param[in] in_tensor_dim_y y dimension of the input tensor
  12898. * @param[in] in_tensor_ch number of input tensor channels
  12899. * @param[in] in_tensor_group number of input tensor groups
  12900. * @param[in] ker_weight pointer of kernel weights
  12901. * @param[in] out_tensor_ch number of output tensor channels
  12902. * @param[in] ker_dim_x x dimension of the filter kernel
  12903. * @param[in] ker_dim_y y dimension of the filter kernel
  12904. * @param[in] pad_x padding size in the x dimension
  12905. * @param[in] pad_y padding size in the y dimension
  12906. * @param[in] stride_x convolution stride in the x dimension
  12907. * @param[in] stride_y convolution stride in the y dimension
  12908. * @param[in] bias pointer of the bias vector
  12909. * @param[out] out_tensor pointer of the output tensor
  12910. * @param[in] out_shift pointer of the shift vector for output
  12911. * tensor
  12912. * @param[in] out_scale pointer of the scaling vector for output
  12913. * tensor
  12914. * @param[in] out_offset value of offset for the output tensor.
  12915. * It should be in the range of -128 to 127.
  12916. * @param[in] in_offset value of offset for the input tensor
  12917. * It should be in the range of -127 to 128.
  12918. * @param[in] act_min minimum value to clip out the ouput
  12919. * tensor. It should be in the range of
  12920. * -128 to 127.
  12921. * @param[in] act_max maximum value to clip out the ouput
  12922. * tensor. It should be in the range of
  12923. * -128 to 127.
  12924. * @param[in] out_tensor_dim_x x dimension of the output tensor
  12925. * @param[in] out_tensor_dim_y y dimension of the output tensor
  12926. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  12927. * It is required when -mext-dsp or
  12928. * -mext-vector is enabled and its needed
  12929. * size could be get by calling riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size.
  12930. * @return This function only returns 0.
  12931. */
  12932. static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
  12933. const uint16_t in_tensor_dim_x,
  12934. const uint16_t in_tensor_dim_y,
  12935. const uint16_t in_tensor_ch,
  12936. const uint16_t in_tensor_group,
  12937. const q7_t *ker_weight,
  12938. const uint16_t out_tensor_ch,
  12939. const uint16_t ker_dim_x,
  12940. const uint16_t ker_dim_y,
  12941. const uint16_t pad_x,
  12942. const uint16_t pad_y,
  12943. const uint16_t stride_x,
  12944. const uint16_t stride_y,
  12945. const int32_t *bias,
  12946. q7_t *out_tensor,
  12947. const int32_t *out_shift,
  12948. const int32_t *out_scale,
  12949. const int32_t out_offset,
  12950. const int32_t in_offset,
  12951. const int32_t act_min,
  12952. const int32_t act_max,
  12953. const uint16_t out_tensor_dim_x,
  12954. const uint16_t out_tensor_dim_y,
  12955. q15_t *in_tmp_buf)
  12956. {
  12957. #if defined(__zcc__)
  12958. tpt_nn_conv_asym_params aConv_params = {stride_x, stride_y, pad_x, pad_y,
  12959. in_offset, out_offset, act_min, act_max};
  12960. tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
  12961. tpt_nn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
  12962. in_tensor_group, ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y,
  12963. out_tensor_ch};
  12964. return tpt_convolve_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
  12965. bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
  12966. #else
  12967. return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any(
  12968. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
  12969. in_tensor_group, ker_weight, out_tensor_ch, ker_dim_x, ker_dim_y, pad_x,
  12970. pad_y, stride_x, stride_y, bias, out_tensor, out_shift, out_scale,
  12971. out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
  12972. out_tensor_dim_y, in_tmp_buf);
  12973. #endif
  12974. }
  12975. /**
  12976. * @brief This function performs 1x1 kernels convolution for signed
  12977. * 8-bit interger inputs/outputs in any x and y dimensions with
  12978. * asymmetric quantization on the outputs.
  12979. * @param[in] in_tensor pointer of the input tensor
  12980. * @param[in] in_tensor_dim_x x dimension of the input tensor
  12981. * @param[in] in_tensor_dim_y y dimension of the input tensor
  12982. * @param[in] in_tensor_ch number of input tensor channels
  12983. * @param[in] in_tensor_group number of input tensor groups
  12984. * @param[in] ker_weight pointer of kernel weights
  12985. * @param[in] out_tensor_ch number of output tensor channels
  12986. * @param[in] pad_x padding size in the x dimension
  12987. * @param[in] pad_y padding size in the y dimension
  12988. * @param[in] stride_x convolution stride in the x dimension
  12989. * @param[in] stride_y convolution stride in the y dimension
  12990. * @param[in] bias pointer of the bias vector
  12991. * @param[out] out_tensor pointer of the output tensor
  12992. * @param[in] out_shift pointer of the shift vector for output
  12993. * tensor
  12994. * @param[in] out_scale pointer of the scaling vector for output
  12995. * tensor
  12996. * @param[in] out_offset value of offset for the output tensor.
  12997. * It should be in the range of -128 to 127.
  12998. * @param[in] in_offset value of offset for the input tensor
  12999. * It should be in the range of -127 to 128.
  13000. * @param[in] act_min minimum value to clip out the ouput
  13001. * tensor. It should be in the range of
  13002. * -128 to 127.
  13003. * @param[in] act_max maximum value to clip out the ouput
  13004. * tensor. It should be in the range of
  13005. * -128 to 127.
  13006. * @param[in] out_tensor_dim_x x dimension of the output tensor
  13007. * @param[in] out_tensor_dim_y y dimension of the output tensor
  13008. * @param[in] tmp_buf dummy
  13009. * @return This function returns 0 on success; otherwise, it returns -1
  13010. * if its inputs do not meet the constraints (see the Note
  13011. * below for details).
  13012. *
  13013. * @note
  13014. * - The input constraints of this function are:
  13015. * - in_tensor_ch is a multiple of 4
  13016. * - pad_x is 0
  13017. * - pad_y is 0
  13018. * - stride_x is 1
  13019. * - stride_y is 1
  13020. */
  13021. static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor,
  13022. const uint16_t in_tensor_dim_x,
  13023. const uint16_t in_tensor_dim_y,
  13024. const uint16_t in_tensor_ch,
  13025. const uint16_t in_tensor_group,
  13026. const q7_t *ker_weight,
  13027. const uint16_t out_tensor_ch,
  13028. const uint16_t pad_x,
  13029. const uint16_t pad_y,
  13030. const uint16_t stride_x,
  13031. const uint16_t stride_y,
  13032. const int32_t *bias,
  13033. q7_t *out_tensor,
  13034. const int32_t *out_shift,
  13035. const int32_t *out_scale,
  13036. const int32_t out_offset,
  13037. const int32_t in_offset,
  13038. const int32_t act_min,
  13039. const int32_t act_max,
  13040. const uint16_t out_tensor_dim_x,
  13041. const uint16_t out_tensor_dim_y,
  13042. q15_t *tmp_buf)
  13043. {
  13044. #if defined(__zcc__)
  13045. tpt_nn_conv_1x1_asym_params aConv_params = {in_offset, out_offset, stride_x,
  13046. stride_y, pad_x, pad_y, act_min, act_max};
  13047. tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
  13048. tpt_nn_1x1_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
  13049. in_tensor_group, out_tensor_ch};
  13050. return tpt_convolve_1x1_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
  13051. bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
  13052. #else
  13053. return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(
  13054. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
  13055. in_tensor_group, ker_weight, out_tensor_ch, pad_x, pad_y, stride_x,
  13056. stride_y, bias, out_tensor, out_shift, out_scale, out_offset, in_offset,
  13057. act_min, act_max, out_tensor_dim_x, out_tensor_dim_y, tmp_buf);
  13058. #endif
  13059. }
  13060. /**
  13061. * @brief This function performs depthwise convolution for signed
  13062. * 8-bit interger inputs/outputs in any x and y dimensions with
  13063. * asymmetric quantization on the outputs.
  13064. * @param[in] in_tensor pointer of the input tensor
  13065. * @param[in] in_tensor_dim_x x dimension of the input tensor
  13066. * @param[in] in_tensor_dim_y y dimension of the input tensor
  13067. * @param[in] in_tensor_ch number of input tensor channels
  13068. * @param[in] ker_weight pointer of kernel weights
  13069. * @param[in] out_tensor_ch number of output tensor channels.
  13070. * out_tensor_ch is equal to ch_mult *
  13071. * in_tensor_ch.
  13072. * @param[in] ch_mult multiplier of input tensor channels
  13073. * @param[in] ker_dim_x x dimension of the filter kernel
  13074. * @param[in] ker_dim_y y dimension of the filter kernel
  13075. * @param[in] pad_x padding size in the x dimension
  13076. * @param[in] pad_y padding size in the y dimension
  13077. * @param[in] stride_x convolution stride in the x dimension
  13078. * @param[in] stride_y convolution stride in the y dimension
  13079. * @param[in] bias pointer of the bias vector
  13080. * @param[out] out_tensor pointer of the output tensor
  13081. * @param[in] out_shift pointer of the shift vector for output
  13082. * tensor
  13083. * @param[in] out_scale pointer of the scaling vector for output
  13084. * tensor
  13085. * @param[in] out_tensor_dim_x x dimension of the output tensor
  13086. * @param[in] out_tensor_dim_y y dimension of the output tensor
  13087. * @param[in] out_offset value of offset for the output tensor.
  13088. * It should be in the range of -128 to 127.
  13089. * @param[in] in_offset value of offset for the input tensor
  13090. * It should be in the range of -127 to 128.
  13091. * @param[in] act_min minimum value to clip out the ouput
  13092. * tensor. It should be in the range of
  13093. * -128 to 127.
  13094. * @param[in] act_max maximum value to clip out the ouput
  13095. * tensor. It should be in the range of
  13096. * -128 to 127.
  13097. * @param[in] dilation_x dummy
  13098. * @param[in] dilation_y dummy
  13099. * @param[in] tmp_buf dummy
  13100. * @return This function only returns 0.
  13101. *
  13102. * @b Example:
  13103. * @code
  13104. * to be modified...
  13105. * @endcode
  13106. */
  13107. static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
  13108. const uint16_t in_tensor_dim_x,
  13109. const uint16_t in_tensor_dim_y,
  13110. const uint16_t in_tensor_ch,
  13111. const q7_t *ker_weight,
  13112. const uint16_t out_tensor_ch,
  13113. const uint16_t ch_mult,
  13114. const uint16_t ker_dim_x,
  13115. const uint16_t ker_dim_y,
  13116. const uint16_t pad_x,
  13117. const uint16_t pad_y,
  13118. const uint16_t stride_x,
  13119. const uint16_t stride_y,
  13120. const int32_t *bias,
  13121. q7_t *out_tensor,
  13122. const int32_t *out_shift,
  13123. const int32_t *out_scale,
  13124. const uint16_t out_tensor_dim_x,
  13125. const uint16_t out_tensor_dim_y,
  13126. const int32_t out_offset,
  13127. const int32_t in_offset,
  13128. const int32_t act_min,
  13129. const int32_t act_max,
  13130. const uint16_t dilation_x,
  13131. const uint16_t dilation_y,
  13132. q15_t *tmp_buf)
  13133. {
  13134. #if defined(__zcc__)
  13135. tpt_nn_dw_conv_asym_params aConv_params = {in_offset, out_offset, ch_mult,
  13136. stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
  13137. tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
  13138. tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
  13139. ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
  13140. return tpt_depthwise_conv_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
  13141. bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
  13142. #else
  13143. return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(
  13144. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  13145. out_tensor_ch, ch_mult, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x,
  13146. stride_y, bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
  13147. out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
  13148. dilation_y, tmp_buf);
  13149. #endif
  13150. }
  13151. /**
  13152. * @brief This function performs 1xn kernels convolution for signed
  13153. * 8-bit integer inputs/outputs in any x and y dimensions with
  13154. * asymmetric quantization on the outputs.
  13155. * @param[in] in_tensor pointer of the input tensor
  13156. * @param[in] in_tensor_dim_x x dimension of the input tensor
  13157. * @param[in] in_tensor_ch number of input tensor channels
  13158. * @param[in] in_tensor_group dummy
  13159. * @param[in] ker_weight pointer of kernel weights
  13160. * @param[in] out_tensor_ch number of output tensor channels
  13161. * @param[in] ker_dim_x x dimension of the filter kernel
  13162. * @param[in] pad_x padding size in the x dimension
  13163. * @param[in] stride_x convolution stride in the x dimension
  13164. * @param[in] bias pointer of the bias vector
  13165. * @param[out] out_tensor pointer of the output tensor
  13166. * @param[in] out_tensor_ch number of output tensor channels
  13167. * @param[in] out_shift pointer of the shift vector for output
  13168. * tensor
  13169. * @param[in] out_scale pointer of the scaling vector for output
  13170. * tensor
  13171. * @param[in] out_offset value of offset for the output tensor.
  13172. * It should be in the range of -128 to 127.
  13173. * @param[in] in_offset value of offset for the input tensor
  13174. * It should be in the range of -127 to 128.
  13175. * @param[in] act_min minimum value to clip out the ouput
  13176. * tensor. It should be in the range of
  13177. * -128 to 127.
  13178. * @param[in] act_max maximum value to clip out the ouput
  13179. * tensor. It should be in the range of
  13180. * -128 to 127.
  13181. * @param[in] out_tensor_dim_x x dimension of the output tensor
  13182. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  13183. * It is required when -mext-dsp or
  13184. * -mext-vector is enabled and its needed
  13185. * size could be get by calling riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size.
  13186. * @return This function returns 0 on success; otherwise, it returns -1
  13187. * if its inputs do not meet the constraint that
  13188. * out_tensor_dim_x is a multiple of 4.
  13189. */
  13190. static inline int hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
  13191. const uint16_t in_tensor_dim_x,
  13192. const uint16_t in_tensor_ch,
  13193. const uint16_t in_tensor_group,
  13194. const q7_t *ker_weight,
  13195. const uint16_t out_tensor_ch,
  13196. const uint16_t ker_dim_x,
  13197. const uint16_t pad_x,
  13198. const uint16_t stride_x,
  13199. const int32_t *bias,
  13200. q7_t *out_tensor,
  13201. const int32_t *out_shift,
  13202. const int32_t *out_scale,
  13203. const int32_t out_offset,
  13204. const int32_t in_offset,
  13205. const int32_t act_min,
  13206. const int32_t act_max,
  13207. const uint16_t out_tensor_dim_x,
  13208. q15_t *in_tmp_buf)
  13209. {
  13210. #if defined(__zcc__)
  13211. tpt_nn_conv_1xn_asym_params aConv_params = {in_offset, out_offset, stride_x, pad_x,
  13212. act_min, act_max};
  13213. tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
  13214. tpt_nn_1xn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_ch, in_tensor_group,
  13215. ker_dim_x, out_tensor_dim_x, out_tensor_ch};
  13216. return tpt_convolve_1xn_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
  13217. bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
  13218. #else
  13219. return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(
  13220. in_tensor, in_tensor_dim_x, in_tensor_ch, in_tensor_group, ker_weight,
  13221. out_tensor_ch, ker_dim_x, pad_x, stride_x, bias, out_tensor, out_shift,
  13222. out_scale, out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
  13223. in_tmp_buf);
  13224. #endif
  13225. }
  13226. /**
  13227. * @brief This function performs fast depthwise convolution for signed
  13228. * 8-bit integer inputs/outputs in any x and y dimensions with
  13229. * asymmetric quantization on the outputs.
  13230. * @param[in] in_tensor pointer of the input tensor
  13231. * @param[in] in_tensor_dim_x x dimension of the input tensor
  13232. * @param[in] in_tensor_dim_y y dimension of the input tensor
  13233. * @param[in] in_tensor_ch number of input tensor channels
  13234. * @param[in] ker_weight pointer of kernel weights
  13235. * @param[in] out_tensor_ch number of output tensor channels
  13236. * @param[in] ker_dim_x x dimension of the filter kernel
  13237. * @param[in] ker_dim_y y dimension of the filter kernel
  13238. * @param[in] pad_x padding size in the x dimension
  13239. * @param[in] pad_y padding size in the y dimension
  13240. * @param[in] stride_x convolution stride in the x dimension
  13241. * @param[in] stride_y convolution stride in the y dimension
  13242. * @param[in] bias pointer of the bias vector
  13243. * @param[out] out_tensor pointer of the output tensor
  13244. * @param[in] out_shift pointer of the shift vector for output
  13245. * tensor
  13246. * @param[in] out_scale pointer of the scaling vector for output
  13247. * tensor
  13248. * @param[in] out_tensor_dim_x x dimension of the output tensor
  13249. * @param[in] out_tensor_dim_y y dimension of the output tensor
  13250. * @param[in] out_offset value of offset for the output tensor.
  13251. * It should be in the range of -128 to 127.
  13252. * @param[in] in_offset value of offset for the input tensor
  13253. * It should be in the range of -127 to 128.
  13254. * @param[in] act_min minimum value to clip out the ouput
  13255. * tensor. It should be in the range of
  13256. * -128 to 127.
  13257. * @param[in] act_max maximum value to clip out the ouput
  13258. * tensor. It should be in the range of
  13259. * -128 to 127.
  13260. * @param[in] dilation_x dummy
  13261. * @param[in] dilation_y dummy
  13262. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  13263. * It is required when -mext-dsp or
  13264. * -mext-vector is enabled and its needed
  13265. * size could be get by calling riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size.
  13266. * @return This function returns 0 on success; otherwise, it returns -1
  13267. * if its inputs do not meet the constraint that in_tensor_ch
  13268. * has to be equal to out_tensor_ch.
  13269. */
  13270. static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor,
  13271. const uint16_t in_tensor_dim_x,
  13272. const uint16_t in_tensor_dim_y,
  13273. const uint16_t in_tensor_ch,
  13274. const q7_t *ker_weight,
  13275. const uint16_t out_tensor_ch,
  13276. const uint16_t ker_dim_x,
  13277. const uint16_t ker_dim_y,
  13278. const uint16_t pad_x,
  13279. const uint16_t pad_y,
  13280. const uint16_t stride_x,
  13281. const uint16_t stride_y,
  13282. const int32_t *bias,
  13283. q7_t *out_tensor,
  13284. const int32_t *out_shift,
  13285. const int32_t *out_scale,
  13286. const uint16_t out_tensor_dim_x,
  13287. const uint16_t out_tensor_dim_y,
  13288. const int32_t out_offset,
  13289. const int32_t in_offset,
  13290. const int32_t act_min,
  13291. const int32_t act_max,
  13292. const uint16_t dilation_x,
  13293. const uint16_t dilation_y,
  13294. q15_t *in_tmp_buf)
  13295. {
  13296. #if defined(__zcc__)
  13297. tpt_nn_dw_conv_asym_fast_params aConv_params = {in_offset, out_offset,
  13298. stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
  13299. tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
  13300. tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
  13301. ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
  13302. return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any(out_tensor, in_tensor, ker_weight,
  13303. bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
  13304. #else
  13305. return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(
  13306. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
  13307. out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
  13308. bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
  13309. out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
  13310. dilation_y, in_tmp_buf);
  13311. #endif
  13312. }
  13313. /**
  13314. * @brief This function is used to get the needed size, in bytes, by
  13315. * the input temporary buffer of riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any.
  13316. * @param[in] in_tensor_ch number of input tensor channels
  13317. * @return This function returns the needed size by the temporary buffer.
  13318. */
  13319. static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch)
  13320. {
  13321. #if defined(__zcc__)
  13322. return tpt_convolve_1x1_s8_s8_s8_asym_bias_any_get_buf_size(
  13323. in_tensor_ch);
  13324. #else
  13325. return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
  13326. in_tensor_ch);
  13327. #endif
  13328. }
  13329. /**
  13330. * @brief This function is used to get the needed size, in bytes, by
  13331. * the input temporary buffer of riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any.
  13332. * @param[in] in_tensor_ch number of input tensor channels
  13333. * @param[in] ker_dim_x x dimension of the filter kernel
  13334. * @param[in] ker_dim_y y dimension of the filter kernel
  13335. * @return This function returns the needed size by the temporary buffer.
  13336. */
  13337. static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch,
  13338. const uint16_t ker_dim_x,
  13339. const uint16_t ker_dim_y)
  13340. {
  13341. #if defined(__zcc__)
  13342. return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
  13343. in_tensor_ch, ker_dim_x, ker_dim_y);
  13344. #else
  13345. return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
  13346. in_tensor_ch, ker_dim_x, ker_dim_y);
  13347. #endif
  13348. }
  13349. /**
  13350. * @brief This function is used to get the needed size, in bytes, by
  13351. * the input temporary buffer of riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any.
  13352. * @param[in] in_tensor_ch number of input tensor channels
  13353. * @param[in] ker_dim_x x dimension of the filter kernel
  13354. * @param[in] ker_dim_y y dimension of the filter kernel. It is
  13355. * always 1 here.
  13356. * @return This function returns the needed size by the temporary buffer.
  13357. */
  13358. static inline int32_t hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch,
  13359. const uint16_t ker_dim_x,
  13360. const uint16_t ker_dim_y)
  13361. {
  13362. #if defined(__zcc__)
  13363. return tpt_convolve_1xn_s8_s8_s8_asym_bias_any_get_buffer_size(
  13364. in_tensor_ch, ker_dim_x, ker_dim_y);
  13365. #else
  13366. return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
  13367. in_tensor_ch, ker_dim_x, ker_dim_y);
  13368. #endif
  13369. }
  13370. /**
  13371. * @brief This function is used to get the needed size, in bytes, by
  13372. * the input temporary buffer of riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any.
  13373. * @param[in] in_tensor_ch number of input tensor channels
  13374. * @param[in] ker_dim_x x dimension of the filter kernel
  13375. * @param[in] ker_dim_y y dimension of the filter kernel
  13376. * @return This function returns the needed size by the temporary buffer.
  13377. */
  13378. static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch,
  13379. const uint16_t ker_dim_x,
  13380. const uint16_t ker_dim_y)
  13381. {
  13382. #if defined(__zcc__)
  13383. return tpt_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
  13384. in_tensor_ch, ker_dim_x, ker_dim_y);
  13385. #else
  13386. return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
  13387. in_tensor_ch, ker_dim_x, ker_dim_y);
  13388. #endif
  13389. }
  13390. #endif
  13391. #endif
  13392. #ifdef HPM_MATH_NN_CONNECTED
  13393. #ifdef HPM_EN_MATH_NN_LIB
  13394. #if defined(__zcc__)
  13395. #include "tpt_nn_fully_connected.h"
  13396. #else
  13397. #include "riscv_nn_fully_connected.h"
  13398. #endif
  13399. /**
  13400. * @defgroup nnfullyconnect NN Fully Connected Functions
  13401. * @ingroup hpmmath
  13402. * @brief The fully connected functions multiply the input vector by a weight
  13403. * matrix and add a bias, if any, to the result. The supported combinations of
  13404. * input vector and weight matrix are (signed 8-bit integer, signed 8-bit integer),
  13405. * (unsigned 8-bit integer, signed 8-bit integer), (signed 16-bit integer,
  13406. * signed 8-bit integer), (signed 16-bit integer, signed 16-bit integer) and
  13407. * (16-bit half-precision floating point, 16-bit half-precision floating point).
  13408. *
  13409. * @{
  13410. */
  13411. /**
  13412. * @brief This is a fully connected layer function for signed 8-bit
  13413. * integer inputs with shift-based quantization on the outputs.
  13414. * @param[in] in_vec pointer of the input vector
  13415. * @param[in] wt_mat pointer of the weight matrix
  13416. * @param[in] size number of elements in the input vector
  13417. * @param[in] wt_row_num number of rows in the weight matrix
  13418. * @param[in] bias_lshift left shift amount for the bias
  13419. * @param[in] out_rshift right shift amount for the output
  13420. * @param[in] bias pointer of the bias vector
  13421. * @param[out] out_vec pointer of the output vector
  13422. * @param[in] in_tmp_buf dummy
  13423. * @return This function only returns 0.
  13424. *
  13425. * @b Example:
  13426. * @code
  13427. * #define IN_SIZE 2048
  13428. * #define OUT_SIZE 256
  13429. * #define BIAS_LSHIFT 9
  13430. * #define OUT_RSHIFT 9
  13431. *
  13432. * q7_t in_vec[IN_SIZE] = {...};;
  13433. * q7_t wt_mat[IN_SIZE * OUT_SIZE] {...};
  13434. * q7_t bias[OUT_SIZE] = {...};
  13435. * q7_t out_vec[OUT_SIZE];
  13436. *
  13437. * hpm_nn_fc_s8_s8_s8_sft_bias(in_vec, wt_mat, IN_SIZE, OUT_SIZE, BIAS_LSHIFT,
  13438. * OUT_RSHIFT, bias, out_vec, NULL);
  13439. * @endcode
  13440. */
  13441. static inline int32_t hpm_nn_fc_s8_s8_s8_sft_bias(const q7_t *in_vec,
  13442. const q7_t *wt_mat,
  13443. const uint16_t size,
  13444. const uint16_t wt_row_num,
  13445. const uint16_t bias_lshift,
  13446. const uint16_t out_rshift,
  13447. const q7_t *bias,
  13448. q7_t *out_vec,
  13449. q15_t *in_tmp_buf)
  13450. #if defined(__zcc__)
  13451. return tpt_nn_fc_s8_s8_s8_sft_bias(in_vec, wt_mat, size, wt_row_num,
  13452. bias_lshift, out_rshift, bias, out_vec,
  13453. in_tmp_buf);
  13454. #else
  13455. return riscv_nn_fc_s8_s8_s8_sft_bias(in_vec, wt_mat, size, wt_row_num,
  13456. bias_lshift, out_rshift, bias, out_vec,
  13457. in_tmp_buf);
  13458. #endif
  13459. }
  13460. /**
  13461. * @brief This is a fully connected layer function for signed 8-bit
  13462. * integer inputs with interleaved multiplication and
  13463. * shift-based quantization on the outputs.
  13464. * @param[in] in_vec pointer of the input vector
  13465. * @param[in] wt_mat pointer of the weight matrix
  13466. * @param[in] size number of elements in the input vector
  13467. * @param[in] wt_row_num number of rows in the weight matrix
  13468. * @param[in] bias_lshift left shift amount for the bias
  13469. * @param[in] out_rshift right shift amount for the output
  13470. * @param[in] bias pointer of the bias vector
  13471. * @param[out] out_vec pointer of the output vector
  13472. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  13473. * required when -mext-vector is enabled and
  13474. * its size must be "2 * size".
  13475. * @return This function only returns 0.
  13476. *
  13477. * @note
  13478. * In this function, the input vector is multiplied by the weight matrix in
  13479. * interleaved formats which could be obtained by riscv_nn_fc_s8_wt_converter.
  13480. */
  13481. static inline int32_t hpm_nn_fc_s8_s8_s8_sft_bias_fast(const q7_t *in_vec,
  13482. const q7_t *wt_mat,
  13483. const uint16_t size,
  13484. const uint16_t wt_row_num,
  13485. const uint16_t bias_lshift,
  13486. const uint16_t out_rshift,
  13487. const q7_t *bias,
  13488. q7_t *out_vec,
  13489. q15_t *in_tmp_buf)
  13490. {
  13491. #if defined(__zcc__)
  13492. return tpt_nn_fc_s8_s8_s8_sft_bias_fast(in_vec, wt_mat, size, wt_row_num,
  13493. bias_lshift, out_rshift, bias,
  13494. out_vec, in_tmp_buf);
  13495. #else
  13496. return riscv_nn_fc_s8_s8_s8_sft_bias_fast(in_vec, wt_mat, size, wt_row_num,
  13497. bias_lshift, out_rshift, bias,
  13498. out_vec, in_tmp_buf);
  13499. #endif
  13500. }
  13501. /**
  13502. * @brief This is a fully connected layer function for signed 16-bit
  13503. * integer inputs with shift-based quantization on the outputs.
  13504. * @param[in] in_vec pointer of the input vector
  13505. * @param[in] wt_mat pointer of the weight matrix
  13506. * @param[in] size number of elements in the input vector
  13507. * @param[in] wt_row_num number of rows in the weight matrix
  13508. * @param[in] bias_lshift left shift amount for the bias
  13509. * @param[in] out_rshift right shift amount for the output
  13510. * @param[in] bias pointer of the bias
  13511. * @param[out] out_vec pointer of the output vector
  13512. * @param[in] tmp_buf dummy
  13513. * @return This function only returns 0.
  13514. */
  13515. static inline int32_t hpm_nn_fc_s16_s16_s16_sft_bias(const q15_t *in_vec,
  13516. const q15_t *wt_mat,
  13517. const uint16_t size,
  13518. const uint16_t wt_row_num,
  13519. const uint16_t bias_lshift,
  13520. const uint16_t out_rshift,
  13521. const q15_t *bias,
  13522. q15_t *out_vec,
  13523. q15_t *tmp_buf)
  13524. {
  13525. #if defined(__zcc__)
  13526. return tpt_nn_fc_s16_s16_s16_sft_bias(in_vec, wt_mat, size, wt_row_num,
  13527. bias_lshift, out_rshift, bias, out_vec,
  13528. tmp_buf);
  13529. #else
  13530. return riscv_nn_fc_s16_s16_s16_sft_bias(in_vec, wt_mat, size, wt_row_num,
  13531. bias_lshift, out_rshift, bias,
  13532. out_vec, tmp_buf);
  13533. #endif
  13534. }
  13535. /**
  13536. * @brief This is a fully connected layer function for signed 16-bit
  13537. * integer inputs with interleaved multiplication and
  13538. * shift-based quantization on the outputs.
  13539. * @param[in] in_vec pointer of the input vector
  13540. * @param[in] wt_mat pointer of the weight matrix
  13541. * @param[in] size number of elements in the input vector
  13542. * @param[in] wt_row_num number of rows in the weight matrix
  13543. * @param[in] bias_lshift left shift amount for the bias
  13544. * @param[in] out_rshift right shift amount for the output
  13545. * @param[in] bias pointer of the bias
  13546. * @param[out] out_vec pointer of the output vector
  13547. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  13548. * required when -mext-vector is enabled and
  13549. * its size must be 4 * size.
  13550. * @return This function only returns 0.
  13551. *
  13552. *
  13553. * @note
  13554. * In this function, the input vector is multiplied by a weight matrix in
  13555. * interleaved formats which could be obtained by riscv_nn_fc_s16_wt_converter.
  13556. */
  13557. static inline int32_t hpm_nn_fc_s16_s16_s16_sft_bias_fast(const q15_t *in_vec,
  13558. const q15_t *wt_mat,
  13559. const uint16_t size,
  13560. const uint16_t wt_row_num,
  13561. const uint16_t bias_lshift,
  13562. const uint16_t out_rshift,
  13563. const q15_t *bias,
  13564. q15_t *out_vec,
  13565. q15_t *in_tmp_buf)
  13566. {
  13567. #if defined(__zcc__)
  13568. return tpt_nn_fc_s16_s16_s16_sft_bias_fast(in_vec, wt_mat, size, wt_row_num,
  13569. bias_lshift, out_rshift, bias,
  13570. out_vec, in_tmp_buf);
  13571. #else
  13572. return riscv_nn_fc_s16_s16_s16_sft_bias_fast(in_vec, wt_mat, size, wt_row_num,
  13573. bias_lshift, out_rshift, bias,
  13574. out_vec, in_tmp_buf);
  13575. #endif
  13576. }
  13577. /**
  13578. * @brief This function multiplies a signed 16-bit integer input
  13579. * vector by a signed 8-bit integer weight matrix with
  13580. * shift-based quantization on the outputs.
  13581. * @param[in] in_vec pointer of the input vector
  13582. * @param[in] wt_mat pointer of the weight matrix
  13583. * @param[in] size number of elements in the input vector
  13584. * @param[in] wt_row_num number of rows in the weight matrix
  13585. * @param[in] bias_lshift left shift amount for the bias
  13586. * @param[in] out_rshift right shift amount for the output
  13587. * @param[in] bias pointer of the bias
  13588. * @param[out] out_vec pointer of the output vector
  13589. * @param[in] tmp_buf dummy
  13590. * @return This function only returns 0.
  13591. */
  13592. static inline int32_t hpm_nn_fc_mat_vec_s16_s16_s8_sft_bias(const q15_t *in_vec,
  13593. const q7_t *wt_mat,
  13594. const uint16_t size,
  13595. const uint16_t wt_row_num,
  13596. const uint16_t bias_lshift,
  13597. const uint16_t out_rshift,
  13598. const q7_t *bias,
  13599. q15_t *out_vec,
  13600. q15_t *tmp_buf)
  13601. {
  13602. #if defined(__zcc__)
  13603. return tpt_nn_fc_mat_vec_s16_s16_s8_sft_bias(in_vec, wt_mat, size, wt_row_num,
  13604. bias_lshift, out_rshift, bias,
  13605. out_vec, tmp_buf);
  13606. #else
  13607. return riscv_nn_fc_mat_vec_s16_s16_s8_sft_bias(
  13608. in_vec, wt_mat, size, wt_row_num, bias_lshift, out_rshift, bias, out_vec,
  13609. tmp_buf);
  13610. #endif
  13611. }
  13612. /**
  13613. * @brief This function multiplies a signed 16-bit integer input
  13614. * vector by a signed 8-bit integer weight matrix with
  13615. * interleaved multiplication and shift-based quantization on
  13616. * the outputs.
  13617. * @param[in] in_vec pointer of the input vector
  13618. * @param[in] wt_mat pointer of the weight matrix
  13619. * @param[in] size number of elements in the input vector
  13620. * @param[in] wt_row_num number of rows in the weight matrix
  13621. * @param[in] bias_lshift left shift amount for the bias
  13622. * @param[in] out_rshift right shift amount for the output
  13623. * @param[in] bias pointer of the bias
  13624. * @param[out] out_vec pointer of the output vector
  13625. * @param[in] tmp_buf dummy
  13626. * @return This function only returns 0.
  13627. *
  13628. * @note
  13629. * In this function, the input vector is multiplied by a weight matrix in
  13630. * interleaved formats which could be obtained by
  13631. * hpm_nn_fc_mat_vec_s8_wt_converter.
  13632. */
  13633. static inline int32_t hpm_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(const q15_t *in_vec,
  13634. const q7_t *wt_mat,
  13635. const uint16_t size,
  13636. const uint16_t wt_row_num,
  13637. const uint16_t bias_lshift,
  13638. const uint16_t out_rshift,
  13639. const q7_t *bias,
  13640. q15_t *out_vec,
  13641. q15_t *tmp_buf)
  13642. {
  13643. #if defined(__zcc__)
  13644. return tpt_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(
  13645. in_vec, wt_mat, size, wt_row_num, bias_lshift, out_rshift, bias, out_vec,
  13646. tmp_buf);
  13647. #else
  13648. return riscv_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(
  13649. in_vec, wt_mat, size, wt_row_num, bias_lshift, out_rshift, bias, out_vec,
  13650. tmp_buf);
  13651. #endif
  13652. }
  13653. /**
  13654. * @brief This is a fully connected layer function for signed 8-bit
  13655. * integer inputs/outputs with bias inputs and symmetric
  13656. * quantization on the outputs.
  13657. * @param[in] in_vec pointer of the input vector
  13658. * @param[in] wt_mat pointer of the weight matrix
  13659. * @param[in] size number of elements in the input vector
  13660. * @param[in] wt_row_num number of rows in the weight matrix
  13661. * @param[in] pre_rshift right shift amount for the output before the
  13662. * scaling
  13663. * @param[in] out_scale scaling value for the output
  13664. * @param[in] post_rshift right shift amount for the output after the
  13665. * scaling
  13666. * @param[in] bias pointer of the bias vector
  13667. * @param[out] out_vec pointer of the output vector
  13668. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  13669. * required when -mext-dsp is enabled and its
  13670. * size must be "size".
  13671. * @return This function only returns 0.
  13672. *
  13673. * @note
  13674. * The outputs will be two-stage shifted before being stored, i.e.,
  13675. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  13676. */
  13677. static inline int32_t hpm_nn_fc_s8_s8_s8_sym_bias(const q7_t *in_vec,
  13678. const q7_t *wt_mat,
  13679. const uint16_t size,
  13680. const uint16_t wt_row_num,
  13681. const uint16_t pre_rshift,
  13682. const uint16_t out_scale,
  13683. const uint16_t post_rshift,
  13684. const q31_t *bias,
  13685. q7_t *out_vec,
  13686. q15_t *in_tmp_buf)
  13687. {
  13688. #if defined(__zcc__)
  13689. return tpt_nn_fc_s8_s8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
  13690. pre_rshift, out_scale, post_rshift, bias,
  13691. out_vec, in_tmp_buf);
  13692. #else
  13693. return riscv_nn_fc_s8_s8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
  13694. pre_rshift, out_scale, post_rshift, bias,
  13695. out_vec, in_tmp_buf);
  13696. #endif
  13697. }
  13698. /**
  13699. * @brief This is a fully connected layer function for signed 8-bit
  13700. * integer inputs and signed 16-bit integer outputs with bias
  13701. * inputs and symmetric quantization on the outputs.
  13702. * @param[in] in_vec pointer of the input vector
  13703. * @param[in] wt_mat pointer of the weight matrix
  13704. * @param[in] size number of elements in the input vector
  13705. * @param[in] wt_row_num number of rows in the weight matrix
  13706. * @param[in] pre_rshift right shift amount for the output before the
  13707. * scaling
  13708. * @param[in] out_scale scaling value for the output
  13709. * @param[in] post_rshift right shift amount for the output after the
  13710. * scaling
  13711. * @param[in] bias pointer of the bias vector
  13712. * @param[out] out_vec pointer of the output vector
  13713. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  13714. * required when -mext-dsp is enabled and its
  13715. * size must be "size".
  13716. * @return This function only returns 0.
  13717. *
  13718. * @note
  13719. * The outputs will be two-stage shifted before being stored, i.e.,
  13720. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  13721. */
  13722. static inline int32_t hpm_nn_fc_s8_s16_s8_sym_bias(const q7_t *in_vec,
  13723. const q7_t *wt_mat,
  13724. const uint16_t size,
  13725. const uint16_t wt_row_num,
  13726. const uint16_t pre_rshift,
  13727. const uint16_t out_scale,
  13728. const uint16_t post_rshift,
  13729. const q31_t *bias,
  13730. q15_t *out_vec,
  13731. q15_t *in_tmp_buf)
  13732. {
  13733. #if defined(__zcc__)
  13734. return tpt_nn_fc_s8_s16_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
  13735. pre_rshift, out_scale, post_rshift, bias,
  13736. out_vec, in_tmp_buf);
  13737. #else
  13738. return riscv_nn_fc_s8_s16_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
  13739. pre_rshift, out_scale, post_rshift,
  13740. bias, out_vec, in_tmp_buf);
  13741. #endif
  13742. }
  13743. /**
  13744. * @brief This is a fully connected layer function for unsigned 8-bit
  13745. * integer inputs/outputs with bias inputs and symmetric
  13746. * quantization on the outputs.
  13747. * @param[in] in_vec pointer of the input vector
  13748. * @param[in] wt_mat pointer of the weight matrix
  13749. * @param[in] size number of elements in the input vector
  13750. * @param[in] wt_row_num number of rows in the weight matrix
  13751. * @param[in] pre_rshift right shift amount for the output before the
  13752. * scaling
  13753. * @param[in] out_scale scaling value for the output
  13754. * @param[in] post_rshift right shift amount for the output after the
  13755. * scaling
  13756. * @param[in] bias pointer of the bias vector
  13757. * @param[out] out_vec pointer of the output vector
  13758. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  13759. * required when -mext-dsp is enabled and its
  13760. * size must be "size".
  13761. * @return This function only returns 0.
  13762. *
  13763. * @note
  13764. * The outputs will be two-stage shifted before being stored, i.e.,
  13765. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  13766. */
  13767. static inline int32_t hpm_nn_fc_u8_u8_s8_sym_bias(const u8_t *in_vec,
  13768. const q7_t *wt_mat,
  13769. const uint16_t size,
  13770. const uint16_t wt_row_num,
  13771. const uint16_t pre_rshift,
  13772. const uint16_t out_scale,
  13773. const uint16_t post_rshift,
  13774. const q31_t *bias,
  13775. u8_t *out_vec,
  13776. q15_t *in_tmp_buf)
  13777. {
  13778. #if defined(__zcc__)
  13779. return tpt_nn_fc_u8_u8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
  13780. pre_rshift, out_scale, post_rshift, bias,
  13781. out_vec, in_tmp_buf);
  13782. #else
  13783. return riscv_nn_fc_u8_u8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
  13784. pre_rshift, out_scale, post_rshift, bias,
  13785. out_vec, in_tmp_buf);
  13786. #endif
  13787. }
  13788. /**
  13789. * @brief This is a fully connected layer function for unsigned 8-bit
  13790. * integer inputs and signed 8-bit integer outputs with bias
  13791. * inputs and symmetric quantization on the outputs.
  13792. * @param[in] in_vec pointer of the input vector
  13793. * @param[in] wt_mat pointer of the weight matrix
  13794. * @param[in] size number of elements in the input vector
  13795. * @param[in] wt_row_num number of rows in the weight matrix
  13796. * @param[in] pre_rshift right shift amount for the output before the
  13797. * scaling
  13798. * @param[in] out_scale scaling value for the output
  13799. * @param[in] post_rshift right shift amount for the output after the
  13800. * scaling
  13801. * @param[in] bias pointer of the bias vector
  13802. * @param[out] out_vec pointer of the output vector
  13803. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  13804. * required when -mext-dsp is enabled and its
  13805. * size must be "size".
  13806. * @return This function only returns 0.
  13807. *
  13808. * @note
  13809. * The outputs will be two-stage shifted before being stored, i.e.,
  13810. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  13811. */
  13812. static inline int32_t hpm_nn_fc_u8_s8_s8_sym_bias(const u8_t *in_vec,
  13813. const q7_t *wt_mat,
  13814. const uint16_t size,
  13815. const uint16_t wt_row_num,
  13816. const uint16_t pre_rshift,
  13817. const uint16_t out_scale,
  13818. const uint16_t post_rshift,
  13819. const q31_t *bias,
  13820. q7_t *out_vec,
  13821. q15_t *in_tmp_buf)
  13822. {
  13823. #if defined(__zcc__)
  13824. return tpt_nn_fc_u8_s8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
  13825. pre_rshift, out_scale, post_rshift, bias,
  13826. out_vec, in_tmp_buf);
  13827. #else
  13828. return riscv_nn_fc_u8_s8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
  13829. pre_rshift, out_scale, post_rshift, bias,
  13830. out_vec, in_tmp_buf);
  13831. #endif
  13832. }
  13833. /**
  13834. * @brief This is a fully connected layer function for unsigned 8-bit
  13835. * integer inputs and signed 16-bit integer outputs with bias
  13836. * inputs and symmetric quantization on the outputs.
  13837. * @param[in] in_vec pointer of the input vector
  13838. * @param[in] wt_mat pointer of the weight matrix
  13839. * @param[in] size number of elements in the input vector
  13840. * @param[in] wt_row_num number of rows in the weight matrix
  13841. * @param[in] pre_rshift right shift amount for the output before the
  13842. * scaling
  13843. * @param[in] out_scale scaling value for the output
  13844. * @param[in] post_rshift right shift amount for the output after the
  13845. * scaling
  13846. * @param[in] bias pointer of the bias vector
  13847. * @param[out] out_vec pointer of the output vector
  13848. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  13849. * required when -mext-dsp is enabled and its
  13850. * size must be "size".
  13851. * @return This function only returns 0.
  13852. *
  13853. * @note
  13854. * The outputs will be two-stage shifted before being stored, i.e.,
  13855. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  13856. */
  13857. static inline int32_t hpm_nn_fc_u8_s16_s8_sym_bias(const u8_t *in_vec,
  13858. const q7_t *wt_mat,
  13859. const uint16_t size,
  13860. const uint16_t wt_row_num,
  13861. const uint16_t pre_rshift,
  13862. const uint16_t out_scale,
  13863. const uint16_t post_rshift,
  13864. const q31_t *bias,
  13865. q15_t *out_vec,
  13866. q15_t *in_tmp_buf)
  13867. {
  13868. #if defined(__zcc__)
  13869. return tpt_nn_fc_u8_s16_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
  13870. pre_rshift, out_scale, post_rshift, bias,
  13871. out_vec, in_tmp_buf);
  13872. #else
  13873. return riscv_nn_fc_u8_s16_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
  13874. pre_rshift, out_scale, post_rshift,
  13875. bias, out_vec, in_tmp_buf);
  13876. #endif
  13877. }
  13878. /**
  13879. * @brief This is a fully connected layer function for signed 8-bit
  13880. * integer inputs/outputs with symmetric quantization on the
  13881. * outputs.
  13882. * @param[in] in_vec pointer of the input vector
  13883. * @param[in] wt_mat pointer of the weight matrix
  13884. * @param[in] size number of elements in the input vector
  13885. * @param[in] wt_row_num number of rows in the weight matrix
  13886. * @param[in] pre_rshift right shift amount for the output before the
  13887. * scaling
  13888. * @param[in] out_scale scaling value for the output
  13889. * @param[in] post_rshift right shift amount for the output after the
  13890. * scaling
  13891. * @param[out] out_vec pointer of the output vector
  13892. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  13893. * required when -mext-dsp is enabled and its
  13894. * size must be "size".
  13895. * @return This function only returns 0.
  13896. *
  13897. * @note
  13898. * The outputs will be two-stage shifted before being stored, i.e.,
  13899. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  13900. */
  13901. static inline int32_t hpm_nn_fc_s8_s8_s8_sym(const q7_t *in_vec,
  13902. const q7_t *wt_mat,
  13903. const uint16_t size,
  13904. const uint16_t wt_row_num,
  13905. const uint16_t pre_rshift,
  13906. const uint16_t out_scale,
  13907. const uint16_t post_rshift,
  13908. q7_t *out_vec,
  13909. q15_t *in_tmp_buf)
  13910. {
  13911. #if defined(__zcc__)
  13912. return tpt_nn_fc_s8_s8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
  13913. out_scale, post_rshift, out_vec, in_tmp_buf);
  13914. #else
  13915. return riscv_nn_fc_s8_s8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
  13916. out_scale, post_rshift, out_vec, in_tmp_buf);
  13917. #endif
  13918. }
  13919. /**
  13920. * @brief This is a fully connected layer function for signed 8-bit
  13921. * integer inputs and signed 16-bit integer outputs with
  13922. * symmetric quantization on the outputs.
  13923. * @param[in] in_vec pointer of the input vector
  13924. * @param[in] wt_mat pointer of the weight matrix
  13925. * @param[in] size number of elements in the input vector
  13926. * @param[in] wt_row_num number of rows in the weight matrix
  13927. * @param[in] pre_rshift right shift amount for the output before the
  13928. * scaling
  13929. * @param[in] out_scale scaling value for the output
  13930. * @param[in] post_rshift right shift amount for the output after the
  13931. * scaling
  13932. * @param[out] out_vec pointer of the output vector
  13933. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  13934. * required when -mext-dsp is enabled and its
  13935. * size must be "size".
  13936. * @return This function only returns 0.
  13937. *
  13938. * @note
  13939. * The outputs will be two-stage shifted before being stored, i.e.,
  13940. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  13941. */
  13942. static inline int32_t hpm_nn_fc_s8_s16_s8_sym(const q7_t *in_vec,
  13943. const q7_t *wt_mat,
  13944. const uint16_t size,
  13945. const uint16_t wt_row_num,
  13946. const uint16_t pre_rshift,
  13947. const uint16_t out_scale,
  13948. const uint16_t post_rshift,
  13949. q15_t *out_vec,
  13950. q15_t *in_tmp_buf)
  13951. {
  13952. #if defined(__zcc__)
  13953. return tpt_nn_fc_s8_s16_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
  13954. out_scale, post_rshift, out_vec, in_tmp_buf);
  13955. #else
  13956. return riscv_nn_fc_s8_s16_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
  13957. out_scale, post_rshift, out_vec, in_tmp_buf);
  13958. #endif
  13959. }
  13960. /**
  13961. * @brief This is a fully connected layer function for unsigned 8-bit
  13962. * integer inputs/outputs with symmetric quantization on the
  13963. * outputs.
  13964. * @param[in] in_vec pointer of the input vector
  13965. * @param[in] wt_mat pointer of the weight matrix
  13966. * @param[in] size number of elements in the input vector
  13967. * @param[in] wt_row_num number of rows in the weight matrix
  13968. * @param[in] pre_rshift right shift amount for the output before the
  13969. * scaling
  13970. * @param[in] out_scale scaling value for the output
  13971. * @param[in] post_rshift right shift amount for the output after the
  13972. * scaling
  13973. * @param[out] out_vec pointer of the output vector
  13974. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  13975. * required when -mext-dsp is enabled and its
  13976. * size must be "size".
  13977. * @return This function only returns 0.
  13978. *
  13979. * @note
  13980. * The outputs will be two-stage shifted before being stored, i.e.,
  13981. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  13982. */
  13983. static inline int32_t hpm_nn_fc_u8_u8_s8_sym(const u8_t *in_vec,
  13984. const q7_t *wt_mat,
  13985. const uint16_t size,
  13986. const uint16_t wt_row_num,
  13987. const uint16_t pre_rshift,
  13988. const uint16_t out_scale,
  13989. const uint16_t post_rshift,
  13990. u8_t *out_vec,
  13991. q15_t *in_tmp_buf)
  13992. {
  13993. #if defined(__zcc__)
  13994. return tpt_nn_fc_u8_u8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
  13995. out_scale, post_rshift, out_vec, in_tmp_buf);
  13996. #else
  13997. return riscv_nn_fc_u8_u8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
  13998. out_scale, post_rshift, out_vec, in_tmp_buf);
  13999. #endif
  14000. }
  14001. /**
  14002. * @brief This is a fully connected layer function for unsigned 8-bit
  14003. * integer inputs and signed 8-bit integer outputs with
  14004. * symmetric quantization on the outputs.
  14005. * @param[in] in_vec pointer of the input vector
  14006. * @param[in] wt_mat pointer of the weight matrix
  14007. * @param[in] size number of elements in the input vector
  14008. * @param[in] wt_row_num number of rows in the weight matrix
  14009. * @param[in] pre_rshift right shift amount for the output before the
  14010. * scaling
  14011. * @param[in] out_scale scaling value for the output
  14012. * @param[in] post_rshift right shift amount for the output after the
  14013. * scaling
  14014. * @param[out] out_vec pointer of the output vector
  14015. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  14016. * required when -mext-dsp is enabled and its
  14017. * size must be "size".
  14018. * @return This function only returns 0.
  14019. *
  14020. * @note
  14021. * The outputs will be two-stage shifted before being stored, i.e.,
  14022. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  14023. */
  14024. static inline int32_t hpm_nn_fc_u8_s8_s8_sym(const u8_t *in_vec,
  14025. const q7_t *wt_mat,
  14026. const uint16_t size,
  14027. const uint16_t wt_row_num,
  14028. const uint16_t pre_rshift,
  14029. const uint16_t out_scale,
  14030. const uint16_t post_rshift,
  14031. q7_t *out_vec,
  14032. q15_t *in_tmp_buf)
  14033. {
  14034. #if defined(__zcc__)
  14035. return tpt_nn_fc_u8_s8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
  14036. out_scale, post_rshift, out_vec, in_tmp_buf);
  14037. #else
  14038. return riscv_nn_fc_u8_s8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
  14039. out_scale, post_rshift, out_vec, in_tmp_buf);
  14040. #endif
  14041. }
  14042. /**
  14043. * @brief This is a fully connected layer function for unsigned 8-bit
  14044. * integer inputs and signed 16-bit integer outputs with
  14045. * symmetric quantization on the outputs.
  14046. * @param[in] in_vec pointer of the input vector
  14047. * @param[in] wt_mat pointer of the weight matrix
  14048. * @param[in] size number of elements in the input vector
  14049. * @param[in] wt_row_num number of rows in the weight matrix
  14050. * @param[in] pre_rshift right shift amount for the output before the
  14051. * scaling
  14052. * @param[in] out_scale scaling value for the output
  14053. * @param[in] post_rshift right shift amount for the output after the
  14054. * scaling
  14055. * @param[out] out_vec pointer of the output vector
  14056. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  14057. * required when -mext-dsp is enabled and its
  14058. * size must be "size".
  14059. * @return This function only returns 0.
  14060. *
  14061. * @note
  14062. * The outputs will be two-stage shifted before being stored, i.e.,
  14063. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  14064. */
  14065. static inline int32_t hpm_nn_fc_u8_s16_s8_sym(const u8_t *in_vec,
  14066. const q7_t *wt_mat,
  14067. const uint16_t size,
  14068. const uint16_t wt_row_num,
  14069. const uint16_t pre_rshift,
  14070. const uint16_t out_scale,
  14071. const uint16_t post_rshift,
  14072. q15_t *out_vec,
  14073. q15_t *in_tmp_buf)
  14074. {
  14075. #if defined(__zcc__)
  14076. return tpt_nn_fc_u8_s16_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
  14077. out_scale, post_rshift, out_vec, in_tmp_buf);
  14078. #else
  14079. return riscv_nn_fc_u8_s16_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
  14080. out_scale, post_rshift, out_vec, in_tmp_buf);
  14081. #endif
  14082. }
  14083. /**
  14084. * @brief This is a fully connected layer function for signed 8-bit
  14085. * integer inputs/outputs with bias inputs, interleaved
  14086. * multiplication and symmetric quantization on the outputs.
  14087. * @param[in] in_vec pointer of the input vector
  14088. * @param[in] wt_mat pointer of the weight matrix
  14089. * @param[in] size number of elements in the input vector
  14090. * @param[in] wt_row_num number of rows in the weight matrix
  14091. * @param[in] pre_rshift right shift amount for the output
  14092. * @param[in] out_scale value of scaling for the output
  14093. * @param[in] post_rshift right shift amount for the output
  14094. * @param[in] bias pointer of the bias vector
  14095. * @param[out] out_vec pointer of the output vector
  14096. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  14097. * required when -mext-vector is enabled and
  14098. * its size must be 2 * size.
  14099. * @return This function only returns 0.
  14100. *
  14101. * @note
  14102. * - In this function, the input vector is multiplied by the weight matrix in
  14103. * interleaved format which could be obtained by riscv_nn_fc_s8_wt_converter.
  14104. * - The outputs will be two-stage shifted before being stored, i.e.,
  14105. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  14106. */
  14107. static inline int32_t hpm_nn_fc_s8_s8_s8_sym_bias_fast(const q7_t *in_vec,
  14108. const q7_t *wt_mat,
  14109. const uint16_t size,
  14110. const uint16_t wt_row_num,
  14111. const uint16_t pre_rshift,
  14112. const uint16_t out_scale,
  14113. const uint16_t post_rshift,
  14114. const q31_t *bias,
  14115. q7_t *out_vec,
  14116. q15_t *in_tmp_buf)
  14117. {
  14118. #if defined(__zcc__)
  14119. return tpt_nn_fc_s8_s8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
  14120. pre_rshift, out_scale, post_rshift,
  14121. bias, out_vec, in_tmp_buf);
  14122. #else
  14123. return riscv_nn_fc_s8_s8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
  14124. pre_rshift, out_scale, post_rshift,
  14125. bias, out_vec, in_tmp_buf);
  14126. #endif
  14127. }
  14128. /**
  14129. * @brief This is a fully connected layer function for signed 8-bit
  14130. * integer inputs and signed 16-bit integer outputs with bias
  14131. * inputs, interleaved multiplication and symmetric
  14132. * quantization on the outputs.
  14133. * @param[in] in_vec pointer of the input vector
  14134. * @param[in] wt_mat pointer of the weight matrix
  14135. * @param[in] size number of elements in the input vector
  14136. * @param[in] wt_row_num number of rows in the weight matrix
  14137. * @param[in] pre_rshift right shift amount for the output
  14138. * @param[in] out_scale value of scaling for the output
  14139. * @param[in] post_rshift right shift amount for the output
  14140. * @param[in] bias pointer of the bias vector
  14141. * @param[out] out_vec pointer of the output vector
  14142. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  14143. * required when -mext-vector is enabled and
  14144. * its size must be 2 * size.
  14145. * @return This function only returns 0.
  14146. *
  14147. * @note
  14148. * - In this function, the input vector is multiplied by the weight matrix in
  14149. * interleaved format which could be obtained by riscv_nn_fc_s8_wt_converter.
  14150. * - The outputs will be two-stage shifted before being stored, i.e.,
  14151. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  14152. */
  14153. static inline int32_t hpm_nn_fc_s8_s16_s8_sym_bias_fast(const q7_t *in_vec,
  14154. const q7_t *wt_mat,
  14155. const uint16_t size,
  14156. const uint16_t wt_row_num,
  14157. const uint16_t pre_rshift,
  14158. const uint16_t out_scale,
  14159. const uint16_t post_rshift,
  14160. const q31_t *bias,
  14161. q15_t *out_vec,
  14162. q15_t *in_tmp_buf)
  14163. {
  14164. #if defined(__zcc__)
  14165. return tpt_nn_fc_s8_s16_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
  14166. pre_rshift, out_scale, post_rshift,
  14167. bias, out_vec, in_tmp_buf);
  14168. #else
  14169. return riscv_nn_fc_s8_s16_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
  14170. pre_rshift, out_scale, post_rshift,
  14171. bias, out_vec, in_tmp_buf);
  14172. #endif
  14173. }
  14174. /**
  14175. * @brief This is a fully connected layer function for unsigned 8-bit
  14176. * integer inputs/outputs with bias inputs, interleaved
  14177. * multiplication and symmetric quantization on the outputs.
  14178. * @param[in] in_vec pointer of the input vector
  14179. * @param[in] wt_mat pointer of the weight matrix
  14180. * @param[in] size number of elements in the input vector
  14181. * @param[in] wt_row_num number of rows in the weight matrix
  14182. * @param[in] pre_rshift right shift amount for the output
  14183. * @param[in] out_scale value of scaling for the output
  14184. * @param[in] post_rshift right shift amount for the output
  14185. * @param[in] bias pointer of the bias vector
  14186. * @param[out] out_vec pointer of the output vector
  14187. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  14188. * required when -mext-vector is enabled and
  14189. * its size must be 2 * size.
  14190. * @return This function only returns 0.
  14191. *
  14192. * @note
  14193. * - In this function, the input vector is multiplied by the weight matrix in
  14194. * interleaved format which could be obtained by riscv_nn_fc_s8_wt_converter.
  14195. * - The outputs will be two-stage shifted before being stored, i.e.,
  14196. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  14197. */
  14198. static inline int32_t hpm_nn_fc_u8_u8_s8_sym_bias_fast(const u8_t *in_vec,
  14199. const q7_t *wt_mat,
  14200. const uint16_t size,
  14201. const uint16_t wt_row_num,
  14202. const uint16_t pre_rshift,
  14203. const uint16_t out_scale,
  14204. const uint16_t post_rshift,
  14205. const q31_t *bias,
  14206. u8_t *out_vec,
  14207. q15_t *in_tmp_buf)
  14208. {
  14209. #if defined(__zcc__)
  14210. return tpt_nn_fc_u8_u8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
  14211. pre_rshift, out_scale, post_rshift,
  14212. bias, out_vec, in_tmp_buf);
  14213. #else
  14214. return riscv_nn_fc_u8_u8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
  14215. pre_rshift, out_scale, post_rshift,
  14216. bias, out_vec, in_tmp_buf);
  14217. #endif
  14218. }
  14219. /**
  14220. * @brief This is a fully connected layer function for unsigned 8-bit
  14221. * integer inputs and signed 8-bit integer outputs with bias
  14222. * inputs, interleaved multiplication and symmetric
  14223. * quantization on the outputs.
  14224. * @param[in] in_vec pointer of the input vector
  14225. * @param[in] wt_mat pointer of the weight matrix
  14226. * @param[in] size number of elements in the input vector
  14227. * @param[in] wt_row_num number of rows in the weight matrix
  14228. * @param[in] pre_rshift right shift amount for the output
  14229. * @param[in] out_scale value of scaling for the output
  14230. * @param[in] post_rshift right shift amount for the output
  14231. * @param[in] bias pointer of the bias vector
  14232. * @param[out] out_vec pointer of the output vector
  14233. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  14234. * required when -mext-vector is enabled and
  14235. * its size must be 2 * size.
  14236. * @return This function only returns 0.
  14237. *
  14238. * @note
  14239. * - In this function, the input vector is multiplied by the weight matrix in
  14240. * interleaved format which could be obtained by riscv_nn_fc_s8_wt_converter.
  14241. * - The outputs will be two-stage shifted before being stored, i.e.,
  14242. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  14243. */
  14244. static inline int32_t hpm_nn_fc_u8_s8_s8_sym_bias_fast(const u8_t *in_vec,
  14245. const q7_t *wt_mat,
  14246. const uint16_t size,
  14247. const uint16_t wt_row_num,
  14248. const uint16_t pre_rshift,
  14249. const uint16_t out_scale,
  14250. const uint16_t post_rshift,
  14251. const q31_t *bias,
  14252. q7_t *out_vec,
  14253. q15_t *in_tmp_buf)
  14254. {
  14255. #if defined(__zcc__)
  14256. return tpt_nn_fc_u8_s8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
  14257. pre_rshift, out_scale, post_rshift,
  14258. bias, out_vec, in_tmp_buf);
  14259. #else
  14260. return riscv_nn_fc_u8_s8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
  14261. pre_rshift, out_scale, post_rshift,
  14262. bias, out_vec, in_tmp_buf);
  14263. #endif
  14264. }
  14265. /**
  14266. * @brief This is a fully connected layer function for unsigned 8-bit
  14267. * integer inputs and signed 16-bit integer outputs with bias
  14268. * inputs, interleaved multiplication and symmetric
  14269. * quantization on the outputs.
  14270. * @param[in] in_vec pointer of the input vector
  14271. * @param[in] wt_mat pointer of the weight matrix
  14272. * @param[in] size number of elements in the input vector
  14273. * @param[in] wt_row_num number of rows in the weight matrix
  14274. * @param[in] pre_rshift right shift amount for the output
  14275. * @param[in] out_scale value of scaling for the output
  14276. * @param[in] post_rshift right shift amount for the output
  14277. * @param[in] bias pointer of the bias vector
  14278. * @param[out] out_vec pointer of the output vector
  14279. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  14280. * required when -mext-vector is enabled and
  14281. * its size must be 2 * size.
  14282. * @return This function only returns 0.
  14283. *
  14284. * @note
  14285. * - In this function, the input vector is multiplied by the weight matrix in
  14286. * interleaved format which could be obtained by riscv_nn_fc_s8_wt_converter.
  14287. * - The outputs will be two-stage shifted before being stored, i.e.,
  14288. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  14289. */
  14290. static inline int32_t hpm_nn_fc_u8_s16_s8_sym_bias_fast(const u8_t *in_vec,
  14291. const q7_t *wt_mat,
  14292. const uint16_t size,
  14293. const uint16_t wt_row_num,
  14294. const uint16_t pre_rshift,
  14295. const uint16_t out_scale,
  14296. const uint16_t post_rshift,
  14297. const q31_t *bias,
  14298. q15_t *out_vec,
  14299. q15_t *in_tmp_buf)
  14300. {
  14301. #if defined(__zcc__)
  14302. return tpt_nn_fc_u8_s16_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
  14303. pre_rshift, out_scale, post_rshift,
  14304. bias, out_vec, in_tmp_buf);
  14305. #else
  14306. return riscv_nn_fc_u8_s16_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
  14307. pre_rshift, out_scale, post_rshift,
  14308. bias, out_vec, in_tmp_buf);
  14309. #endif
  14310. }
  14311. /**
  14312. * @brief This is a fully connected layer function for signed 8-bit
  14313. * integer inputs/outputs with interleaved multiplication and
  14314. * symmetric quantization on the outputs.
  14315. * @param[in] in_vec pointer of the input vector
  14316. * @param[in] wt_mat pointer of the weight matrix
  14317. * @param[in] size number of elements in the input vector
  14318. * @param[in] wt_row_num number of rows in the weight matrix
  14319. * @param[in] pre_rshift right shift amount for the output
  14320. * @param[in] out_scale value of scaling for the output
  14321. * @param[in] post_rshift right shift amount for the output
  14322. * @param[out] out_vec pointer of the output vector
  14323. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  14324. * required when -mext-vector is enabled and
  14325. * its size must be 2 * size.
  14326. * @return This function only returns 0.
  14327. *
  14328. * @note
  14329. * - In this function, the input vector is multiplied by the weight matrix in
  14330. * interleaved format which could be obtained by riscv_nn_fc_s8_wt_converter.
  14331. * - The outputs will be two-stage shifted before being stored, i.e.,
  14332. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  14333. */
  14334. static inline int32_t hpm_nn_fc_s8_s8_s8_sym_fast(const q7_t *in_vec,
  14335. const q7_t *wt_mat,
  14336. const uint16_t size,
  14337. const uint16_t wt_row_num,
  14338. const uint16_t pre_rshift,
  14339. const uint16_t out_scale,
  14340. const uint16_t post_rshift,
  14341. q7_t *out_vec,
  14342. q15_t *in_tmp_buf)
  14343. {
  14344. #if defined(__zcc__)
  14345. return tpt_nn_fc_s8_s8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
  14346. pre_rshift, out_scale, post_rshift,
  14347. out_vec, in_tmp_buf);
  14348. #else
  14349. return riscv_nn_fc_s8_s8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
  14350. pre_rshift, out_scale, post_rshift,
  14351. out_vec, in_tmp_buf);
  14352. #endif
  14353. }
  14354. /**
  14355. * @brief This is a fully connected layer function for signed 8-bit
  14356. * integer inputs and signed 16-bit integer outputs with
  14357. * interleaved multiplication and symmetric quantization on the
  14358. * outputs.
  14359. * @param[in] in_vec pointer of the input vector
  14360. * @param[in] wt_mat pointer of the weight matrix
  14361. * @param[in] size number of elements in the input vector
  14362. * @param[in] wt_row_num number of rows in the weight matrix
  14363. * @param[in] pre_rshift right shift amount for the output
  14364. * @param[in] out_scale value of scaling for the output
  14365. * @param[in] post_rshift right shift amount for the output
  14366. * @param[out] out_vec pointer of the output vector
  14367. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  14368. * required when -mext-vector is enabled and
  14369. * its size must be 2 * size.
  14370. * @return This function only returns 0.
  14371. *
  14372. * @note
  14373. * - In this function, the input vector is multiplied by the weight matrix in
  14374. * interleaved format which could be obtained by riscv_nn_fc_s8_wt_converter.
  14375. * - The outputs will be two-stage shifted before being stored, i.e.,
  14376. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  14377. */
  14378. static inline int32_t hpm_nn_fc_s8_s16_s8_sym_fast(const q7_t *in_vec,
  14379. const q7_t *wt_mat,
  14380. const uint16_t size,
  14381. const uint16_t wt_row_num,
  14382. const uint16_t pre_rshift,
  14383. const uint16_t out_scale,
  14384. const uint16_t post_rshift,
  14385. q15_t *out_vec,
  14386. q15_t *in_tmp_buf)
  14387. {
  14388. #if defined(__zcc__)
  14389. return tpt_nn_fc_s8_s16_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
  14390. pre_rshift, out_scale, post_rshift,
  14391. out_vec, in_tmp_buf);
  14392. #else
  14393. return riscv_nn_fc_s8_s16_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
  14394. pre_rshift, out_scale, post_rshift,
  14395. out_vec, in_tmp_buf);
  14396. #endif
  14397. }
  14398. /**
  14399. * @brief This is a fully connected layer function for unsigned 8-bit
  14400. * integer inputs/outputs with interleaved multiplication and
  14401. * symmetric quantization on the outputs.
  14402. * @param[in] in_vec pointer of the input vector
  14403. * @param[in] wt_mat pointer of the weight matrix
  14404. * @param[in] size number of elements in the input vector
  14405. * @param[in] wt_row_num number of rows in the weight matrix
  14406. * @param[in] pre_rshift right shift amount for the output
  14407. * @param[in] out_scale value of scaling for the output
  14408. * @param[in] post_rshift right shift amount for the output
  14409. * @param[out] out_vec pointer of the output vector
  14410. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  14411. * required when -mext-vector is enabled and
  14412. * its size must be 2 * size.
  14413. * @return This function only returns 0.
  14414. *
  14415. * @note
  14416. * - In this function, the input vector is multiplied by the weight matrix in
  14417. * interleaved format which could be obtained by riscv_nn_fc_s8_wt_converter.
  14418. * - The outputs will be two-stage shifted before being stored, i.e.,
  14419. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  14420. */
  14421. static inline int32_t hpm_nn_fc_u8_u8_s8_sym_fast(const u8_t *in_vec,
  14422. const q7_t *wt_mat,
  14423. const uint16_t size,
  14424. const uint16_t wt_row_num,
  14425. const uint16_t pre_rshift,
  14426. const uint16_t out_scale,
  14427. const uint16_t post_rshift,
  14428. u8_t *out_vec,
  14429. q15_t *in_tmp_buf)
  14430. {
  14431. #if defined(__zcc__)
  14432. return tpt_nn_fc_u8_u8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
  14433. pre_rshift, out_scale, post_rshift,
  14434. out_vec, in_tmp_buf);
  14435. #else
  14436. return riscv_nn_fc_u8_u8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
  14437. pre_rshift, out_scale, post_rshift,
  14438. out_vec, in_tmp_buf);
  14439. #endif
  14440. }
  14441. /**
  14442. * @brief This is a fully connected layer function for unsigned 8-bit
  14443. * integer inputs and signed 8-bit integer outputs with
  14444. * interleaved multiplication and symmetric quantization on the
  14445. * outputs.
  14446. * @param[in] in_vec pointer of the input vector
  14447. * @param[in] wt_mat pointer of the weight matrix
  14448. * @param[in] size number of elements in the input vector
  14449. * @param[in] wt_row_num number of rows in the weight matrix
  14450. * @param[in] pre_rshift right shift amount for the output
  14451. * @param[in] out_scale value of scaling for the output
  14452. * @param[in] post_rshift right shift amount for the output
  14453. * @param[out] out_vec pointer of the output vector
  14454. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  14455. * required when -mext-vector is enabled and
  14456. * its size must be 2 * size.
  14457. * @return This function only returns 0.
  14458. *
  14459. * @note
  14460. * - In this function, the input vector is multiplied by the weight matrix in
  14461. * interleaved format which could be obtained by riscv_nn_fc_s8_wt_converter.
  14462. * - The outputs will be two-stage shifted before being stored, i.e.,
  14463. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  14464. */
  14465. static inline int32_t hpm_nn_fc_u8_s8_s8_sym_fast(const u8_t *in_vec,
  14466. const q7_t *wt_mat,
  14467. const uint16_t size,
  14468. const uint16_t wt_row_num,
  14469. const uint16_t pre_rshift,
  14470. const uint16_t out_scale,
  14471. const uint16_t post_rshift,
  14472. q7_t *out_vec,
  14473. q15_t *in_tmp_buf)
  14474. {
  14475. #if defined(__zcc__)
  14476. return tpt_nn_fc_u8_s8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
  14477. pre_rshift, out_scale, post_rshift,
  14478. out_vec, in_tmp_buf);
  14479. #else
  14480. return riscv_nn_fc_u8_s8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
  14481. pre_rshift, out_scale, post_rshift,
  14482. out_vec, in_tmp_buf);
  14483. #endif
  14484. }
  14485. /**
  14486. * @brief This is a fully connected layer function for unsigned 8-bit
  14487. * integer inputs and signed 16-bit integer outputs with
  14488. * interleaved multiplication and symmetric quantization on the
  14489. * outputs.
  14490. * @param[in] in_vec pointer of the input vector
  14491. * @param[in] wt_mat pointer of the weight matrix
  14492. * @param[in] size number of elements in the input vector
  14493. * @param[in] wt_row_num number of rows in the weight matrix
  14494. * @param[in] pre_rshift right shift amount for the output
  14495. * @param[in] out_scale value of scaling for the output
  14496. * @param[in] post_rshift right shift amount for the output
  14497. * @param[out] out_vec pointer of the output vector
  14498. * @param[in] in_tmp_buf temporary buffer for input vector. It is
  14499. * required when -mext-vector is enabled and
  14500. * its size must be 2 * size.
  14501. * @return This function only returns 0.
  14502. *
  14503. * @note
  14504. * - In this function, the input vector is multiplied by the weight matrix in
  14505. * interleaved format which could be obtained by riscv_nn_fc_s8_wt_converter.
  14506. * - The outputs will be two-stage shifted before being stored, i.e.,
  14507. * out = ((out >> pre_rshift) *out_scale) >> post_rshift.
  14508. */
  14509. static inline int32_t hpm_nn_fc_u8_s16_s8_sym_fast(const u8_t *in_vec,
  14510. const q7_t *wt_mat,
  14511. const uint16_t size,
  14512. const uint16_t wt_row_num,
  14513. const uint16_t pre_rshift,
  14514. const uint16_t out_scale,
  14515. const uint16_t post_rshift,
  14516. q15_t *out_vec,
  14517. q15_t *in_tmp_buf)
  14518. {
  14519. #if defined(__zcc__)
  14520. return tpt_nn_fc_u8_s16_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
  14521. pre_rshift, out_scale, post_rshift,
  14522. out_vec, in_tmp_buf);
  14523. #else
  14524. return riscv_nn_fc_u8_s16_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
  14525. pre_rshift, out_scale, post_rshift,
  14526. out_vec, in_tmp_buf);
  14527. #endif
  14528. }
  14529. /**
  14530. * @brief This is a weight converter for those fully-connected
  14531. * functions with signed 8-bit weight data and named with
  14532. * "fast".
  14533. * @param[in] wt_mat pointer of the weight matrix
  14534. * @param[in] size number of elements in the input vector
  14535. * @param[in] wt_row_num number of rows in the weight matrix
  14536. * @param[out] wt_mat_out pointer of the weight matrix stored in
  14537. * specific ordering
  14538. */
  14539. static inline void hpm_nn_fc_s8_wt_converter(const q7_t *wt_mat,
  14540. const uint32_t size,
  14541. const uint32_t wt_row_num,
  14542. q7_t *wt_mat_out)
  14543. {
  14544. #if defined(__zcc__)
  14545. tpt_nn_fc_s8_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
  14546. #else
  14547. riscv_nn_fc_s8_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
  14548. #endif
  14549. }
  14550. /**
  14551. * @brief This is a weight converter for those fully-connected
  14552. * functions with signed 16-bit weight data and named with
  14553. * "fast".
  14554. * @param[in] wt_mat pointer of the weight matrix
  14555. * @param[in] size number of elements in the input vector
  14556. * @param[in] wt_row_num number of rows in the weight matrix
  14557. * @param[out] wt_mat_out pointer of the weight matrix stored in
  14558. * specific ordering
  14559. */
  14560. static inline void hpm_nn_fc_s16_wt_converter(const q15_t *wt_mat,
  14561. const uint32_t size,
  14562. const uint32_t wt_row_num,
  14563. q15_t *wt_mat_out)
  14564. {
  14565. #if defined(__zcc__)
  14566. tpt_nn_fc_s16_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
  14567. #else
  14568. riscv_nn_fc_s16_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
  14569. #endif
  14570. }
  14571. /**
  14572. * @brief This is a weight converter for
  14573. * riscv_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast.
  14574. * @param[in] wt_mat pointer of the weight matrix
  14575. * @param[in] size number of elements in the input vector
  14576. * @param[in] wt_row_num number of rows in the weight matrix
  14577. * @param[out] wt_mat_out pointer of the weight matrix stored in
  14578. * specific ordering
  14579. */
  14580. static inline void hpm_nn_fc_mat_vec_s8_wt_converter(const q7_t *wt_mat,
  14581. const uint32_t size,
  14582. const uint32_t wt_row_num,
  14583. q7_t *wt_mat_out)
  14584. {
  14585. #if defined(__zcc__)
  14586. tpt_nn_fc_mat_vec_s8_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
  14587. #else
  14588. riscv_nn_fc_mat_vec_s8_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
  14589. #endif
  14590. }
  14591. /**
  14592. * @brief This is a fully connected layer function for signed 8-bit
  14593. * integer inputs with bias inputs and asymmetric quantization
  14594. * on the outputs.
  14595. * @param[in] in_vec pointer of the input vector
  14596. * @param[in] wt_mat pointer of the transposed weight matrix
  14597. * @param[in] in_vec_col number of columns in the input vector (or
  14598. * transposed weight matrix)
  14599. * @param[in] wt_mat_row number of rows in the transposed weight
  14600. * matrix
  14601. * @param[in] in_vec_group number of input vector groups
  14602. * @param[in] in_offset value of offset to be added to the input
  14603. * tensor. It should be in the range of -127 to
  14604. * 128.
  14605. * @param[in] wt_offset value of offset to be added to the weight.
  14606. * It should be in the range of -127 to 128.
  14607. * @param[in] out_scale value of sacling for the output tensor
  14608. * @param[in] out_shift shift amount for the output tensor
  14609. * @param[in] out_offset value of offset to be added to the output
  14610. * tensor. It should be in the range of -128 to
  14611. * 127.
  14612. * @param[in] bias pointer of the bias vector
  14613. * @param[in] out_vec pointer of the output vector
  14614. * @param[in] act_min minimum value to clip out the ouput tensor.
  14615. * It should be in the range of -128 to 127.
  14616. * @param[in] act_max maximum value to clip out the ouput tensor.
  14617. * It should be in the range of -128 to 127.
  14618. * @param[in] tmp_buf dummy
  14619. * @return This function only returns 0.
  14620. */
  14621. static inline int32_t hpm_nn_fc_s8_s8_s8_asym_bias(const int8_t *in_vec,
  14622. const int8_t *wt_mat,
  14623. const uint16_t in_vec_col,
  14624. const uint16_t wt_mat_row,
  14625. const uint16_t in_vec_group,
  14626. const int32_t in_offset,
  14627. const int32_t wt_offset,
  14628. const int32_t out_scale,
  14629. const int32_t out_shift,
  14630. const int32_t out_offset,
  14631. const int32_t *bias,
  14632. int8_t *out_vec,
  14633. const int32_t act_min,
  14634. const int32_t act_max,
  14635. q15_t *tmp_buf)
  14636. {
  14637. #if defined(__zcc__)
  14638. tpt_nn_fc_params_asym_s8 aFc_params = {in_offset, wt_offset, out_offset, out_scale,
  14639. out_shift, act_min, act_max};
  14640. tpt_nn_fc_dims_asym_s8 aFC_dims = {in_vec_col, in_vec_group, wt_mat_row};
  14641. return tpt_fully_connected_s8(out_vec, in_vec, wt_mat, bias, &aFc_params,
  14642. &aFC_dims, tmp_buf);
  14643. #else
  14644. return riscv_nn_fc_s8_s8_s8_asym_bias(in_vec, wt_mat, in_vec_col, wt_mat_row,
  14645. in_vec_group, in_offset, wt_offset,
  14646. out_scale, out_shift, out_offset, bias,
  14647. out_vec, act_min, act_max, tmp_buf);
  14648. #endif
  14649. }
  14650. /**
  14651. * @brief This function is used to get the needed size, in bytes, by
  14652. * the temporary buffer of riscv_nn_fc_s8_s8_s8_asym_bias.
  14653. * @param[in] in_vec_col number of columns in the input vector (or
  14654. * transposed weight matrix)
  14655. * @return This function returns the needed size by the temporary buffer.
  14656. */
  14657. static inline int32_t hpm_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(const uint16_t in_vec_col)
  14658. {
  14659. #if defined(__zcc__)
  14660. return tpt_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
  14661. #else
  14662. return riscv_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
  14663. #endif
  14664. }
  14665. /**
  14666. * * @}
  14667. */
  14668. #endif
  14669. #ifdef HPM_EN_MATH_NN_RVP32_LIB
  14670. #if defined(__zcc__)
  14671. #include "tpt_nn_fully_connected.h"
  14672. #else
  14673. #include "riscv_nn_fully_connected.h"
  14674. #endif
  14675. /**
  14676. * @brief This is a fully connected layer function for signed 8-bit
  14677. * integer inputs with bias inputs and asymmetric quantization
  14678. * on the outputs.
  14679. * @param[in] in_vec pointer of the input vector
  14680. * @param[in] wt_mat pointer of the transposed weight matrix
  14681. * @param[in] in_vec_col number of columns in the input vector (or
  14682. * transposed weight matrix)
  14683. * @param[in] wt_mat_row number of rows in the transposed weight
  14684. * matrix
  14685. * @param[in] in_vec_group number of input vector groups
  14686. * @param[in] in_offset value of offset to be added to the input
  14687. * tensor. It should be in the range of -127 to
  14688. * 128.
  14689. * @param[in] wt_offset value of offset to be added to the weight.
  14690. * It should be in the range of -127 to 128.
  14691. * @param[in] out_scale value of sacling for the output tensor
  14692. * @param[in] out_shift shift amount for the output tensor
  14693. * @param[in] out_offset value of offset to be added to the output
  14694. * tensor. It should be in the range of -128 to
  14695. * 127.
  14696. * @param[in] bias pointer of the bias vector
  14697. * @param[in] out_vec pointer of the output vector
  14698. * @param[in] act_min minimum value to clip out the ouput tensor.
  14699. * It should be in the range of -128 to 127.
  14700. * @param[in] act_max maximum value to clip out the ouput tensor.
  14701. * It should be in the range of -128 to 127.
  14702. * @param[in] tmp_buf dummy
  14703. * @return This function only returns 0.
  14704. */
  14705. static inline int32_t hpm_nn_fc_s8_s8_s8_asym_bias(const int8_t *in_vec,
  14706. const int8_t *wt_mat,
  14707. const uint16_t in_vec_col,
  14708. const uint16_t wt_mat_row,
  14709. const uint16_t in_vec_group,
  14710. const int32_t in_offset,
  14711. const int32_t wt_offset,
  14712. const int32_t out_scale,
  14713. const int32_t out_shift,
  14714. const int32_t out_offset,
  14715. const int32_t *bias,
  14716. int8_t *out_vec,
  14717. const int32_t act_min,
  14718. const int32_t act_max,
  14719. q15_t *tmp_buf)
  14720. {
  14721. #if defined(__zcc__)
  14722. tpt_nn_fc_params_asym_s8 aFc_params = {in_offset, wt_offset, out_offset, out_scale,
  14723. out_shift, act_min, act_max};
  14724. tpt_nn_fc_dims_asym_s8 aFC_dims = {in_vec_col, in_vec_group, wt_mat_row};
  14725. return tpt_fully_connected_s8(out_vec, in_vec, wt_mat, bias, &aFc_params,
  14726. &aFC_dims, tmp_buf);
  14727. #else
  14728. return riscv_nn_fc_s8_s8_s8_asym_bias(in_vec, wt_mat, in_vec_col, wt_mat_row,
  14729. in_vec_group, in_offset, wt_offset,
  14730. out_scale, out_shift, out_offset, bias,
  14731. out_vec, act_min, act_max, tmp_buf);
  14732. #endif
  14733. }
  14734. /**
  14735. * @brief This function is used to get the needed size, in bytes, by
  14736. * the temporary buffer of riscv_nn_fc_s8_s8_s8_asym_bias.
  14737. * @param[in] in_vec_col number of columns in the input vector (or
  14738. * transposed weight matrix)
  14739. * @return This function returns the needed size by the temporary buffer.
  14740. */
  14741. static inline int32_t hpm_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(const uint16_t in_vec_col)
  14742. {
  14743. #if defined(__zcc__)
  14744. return tpt_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
  14745. #else
  14746. return riscv_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
  14747. #endif
  14748. }
  14749. #endif /* HPM_EN_MATH_NN_RVP32_LIB */
  14750. #endif
  14751. #ifdef HPM_MATH_NN_POOLING
  14752. #ifdef HPM_EN_MATH_NN_LIB
  14753. #if defined(__zcc__)
  14754. #include "tpt_nn_pooling.h"
  14755. #else
  14756. #include "riscv_nn_pooling.h"
  14757. #endif
  14758. /**
  14759. * @defgroup nnpooling NN Pooling Functions
  14760. * @ingroup hpmmath
  14761. * @brief The pooling functions are used to downsample input data. They include
  14762. * max and average pooling functions.
  14763. *
  14764. * @{
  14765. */
  14766. /**
  14767. * @brief This is an average pooling function for signed 8-bit integer
  14768. * inputs.
  14769. * @param[in] in_tensor pointer of the input tensor
  14770. * @param[in] in_tensor_dim dimension of the input tensor
  14771. * @param[in] in_tensor_ch number of input tensor channels
  14772. * @param[in] ker_dim dimension of the filter kernel
  14773. * @param[in] pad padding size
  14774. * @param[in] stride convolution stride
  14775. * @param[in] out_tensor_dim dimension of the output tensor
  14776. * @param[in] in_tmp_buf temporary buffer for the input tensor
  14777. * @param[out] out_tensor pointer of the output tensor. It is required
  14778. * when -mext-dsp is enabled and its size must
  14779. * be equal to "2 * out_tensor_dim *
  14780. * in_tensor_ch".
  14781. *
  14782. * @b Example:
  14783. * @code
  14784. * #define IN_DIM 32
  14785. * #define IN_CH 32
  14786. * #define KER_DIM 3
  14787. * #define PAD 0
  14788. * #define STRIDE 2
  14789. * #define OUT_DIM 15
  14790. *
  14791. * q7_t in_data[IN_CH * IN_DIM * IN_DIM] = {...};
  14792. * q7_t out_data[IN_CH * OUT_DIM * OUT_DIM] = {...};
  14793. * q7_t in_tmp_buf[2 * OUT_DIM * IN_CH];
  14794. *
  14795. * hpm_nn_avepool_HWC_s8(in_data, IN_DIM, IN_CH, KER_DIM, PAD, STRIDE,
  14796. * OUT_DIM, in_tmp_buf, out_data);
  14797. * @endcode
  14798. */
  14799. static inline void hpm_nn_avepool_HWC_s8(q7_t *in_tensor,
  14800. const uint16_t in_tensor_dim,
  14801. const uint16_t in_tensor_ch,
  14802. const uint16_t ker_dim,
  14803. const uint16_t pad,
  14804. const uint16_t stride,
  14805. const uint16_t out_tensor_dim,
  14806. q7_t *in_tmp_buf,
  14807. q7_t *out_tensor)
  14808. {
  14809. #if defined(__zcc__)
  14810. tpt_nn_avepool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
  14811. stride, out_tensor_dim, in_tmp_buf, out_tensor);
  14812. #else
  14813. riscv_nn_avepool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
  14814. stride, out_tensor_dim, in_tmp_buf, out_tensor);
  14815. #endif
  14816. }
  14817. /**
  14818. * @brief This is an average pooling function for signed 8-bit integer
  14819. * inputs in any x and y dimensions.
  14820. * @param[in] in_tensor pointer of the input tensor
  14821. * @param[in] in_tensor_dim_x x dimension of the input tensor
  14822. * @param[in] in_tensor_dim_y y dimension of the input tensor
  14823. * @param[in] in_tensor_ch number of input tensor channels
  14824. * @param[in] ker_dim_x x dimension of the filter kernel
  14825. * @param[in] ker_dim_y y dimension of the filter kernel
  14826. * @param[in] pad_x padding size in the x dimension
  14827. * @param[in] pad_y padding size in the y dimension
  14828. * @param[in] stride_x convolution stride in the x dimension
  14829. * @param[in] stride_y convolution stride in the y dimension
  14830. * @param[in] out_tensor_dim_x x dimension of the output tensor
  14831. * @param[in] out_tensor_dim_y y dimension of the output tensor
  14832. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  14833. * It is required when -mext-dsp is enabled
  14834. * and its size must be equal to "2 *
  14835. * out_tensor_dim_x * in_tensor_ch".
  14836. * @param[out] out_tensor pointer of the output tensor
  14837. * @param[in] out_lshift left shift amount for the output
  14838. *
  14839. * @b Example:
  14840. * @code
  14841. * #define IN_X 160
  14842. * #define IN_Y 120
  14843. * #define IN_CH 3
  14844. * #define KER_DIM_X 3
  14845. * #define KER_DIM_Y 5
  14846. * #define PAD_X 1
  14847. * #define PAD_Y 1
  14848. * #define STRIDE_X 2
  14849. * #define STRIDE_Y 2
  14850. * #define OUT_LSHIFT 3
  14851. * #define OUT_X 80
  14852. * #define OUT_Y 59
  14853. *
  14854. * q7_t in_data[IN_CH * IN_X * IN_Y] = {...};
  14855. * q7_t out_data[IN_CH * OUT_X * OUT_Y] = {...};
  14856. * q7_t in_tmp_buf[2 * IN_CH * OUT_X * OUT_Y];
  14857. *
  14858. * hpm_nn_avepool_HWC_s8_any(in_data, IN_X, IN_Y, IN_CH, KER_DIM_X, KER_DIM_Y,
  14859. * PAD_X, PAD_Y, STRIDE_X, STRIDE_Y, OUT_X, OUT_Y, in_tmp_buf, out_data,
  14860. * OUT_LSHIFT);
  14861. * @endcode
  14862. */
  14863. static inline void hpm_nn_avepool_HWC_s8_any(q7_t *in_tensor,
  14864. const uint16_t in_tensor_dim_x,
  14865. const uint16_t in_tensor_dim_y,
  14866. const uint16_t in_tensor_ch,
  14867. const uint16_t ker_dim_x,
  14868. const uint16_t ker_dim_y,
  14869. const uint16_t pad_x,
  14870. const uint16_t pad_y,
  14871. const uint16_t stride_x,
  14872. const uint16_t stride_y,
  14873. const uint16_t out_tensor_dim_x,
  14874. const uint16_t out_tensor_dim_y,
  14875. q7_t *in_tmp_buf,
  14876. q7_t *out_tensor,
  14877. const uint16_t out_lshift)
  14878. {
  14879. #if defined(__zcc__)
  14880. tpt_nn_avepool_HWC_s8_any(
  14881. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_dim_x,
  14882. ker_dim_y, pad_x, pad_y, stride_x, stride_y, out_tensor_dim_x,
  14883. out_tensor_dim_y, in_tmp_buf, out_tensor, out_lshift);
  14884. #else
  14885. riscv_nn_avepool_HWC_s8_any(
  14886. in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_dim_x,
  14887. ker_dim_y, pad_x, pad_y, stride_x, stride_y, out_tensor_dim_x,
  14888. out_tensor_dim_y, in_tmp_buf, out_tensor, out_lshift);
  14889. #endif
  14890. }
  14891. /**
  14892. * @brief This is an average pooling function for S8 inputs with any x
  14893. * and y dimension with the actvating parameters to limit the
  14894. * outputs.
  14895. * @param[in] in_tensor_dim_y y dimension of the input tensor
  14896. * @param[in] in_tensor_dim_x x dimension of the input tensor
  14897. * @param[in] out_tensor_dim_y y dimension of the output tensor
  14898. * @param[in] out_tensor_dim_x x dimension of the output tensor
  14899. * @param[in] stride_y convolution stride in the y dimension
  14900. * @param[in] stride_x convolution stride in the x dimension
  14901. * @param[in] ker_dim_y y dimension of the filter kernel
  14902. * @param[in] ker_dim_x x dimension of the filter kernel
  14903. * @param[in] pad_y padding size in the y dimension
  14904. * @param[in] pad_x padding size in the x dimension
  14905. * @param[in] act_min minimum value that the output tensor is
  14906. * limited to. It should be in the range of
  14907. * -128 to 127.
  14908. * @param[in] act_max maximum value that the output tensor is
  14909. * limited to. It should be in the range of
  14910. * -128 to 127.
  14911. * @param[in] in_tensor_ch number of input tensor channels
  14912. * @param[in] in_tensor pointer of the input tensor
  14913. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  14914. * Its needed size could be obtained by
  14915. * calling riscv_nn_avepool_s8_HWC_any_get_buffer_size.
  14916. * @param[out] out_tensor pointer of the output tensor
  14917. * @return This function only returns 0.
  14918. */
  14919. static inline int32_t hpm_nn_avepool_HWC_s8_any_act(const int in_tensor_dim_y,
  14920. const int in_tensor_dim_x,
  14921. const int out_tensor_dim_y,
  14922. const int out_tensor_dim_x,
  14923. const int stride_y,
  14924. const int stride_x,
  14925. const int ker_dim_y,
  14926. const int ker_dim_x,
  14927. const int pad_y,
  14928. const int pad_x,
  14929. const int act_min,
  14930. const int act_max,
  14931. const int in_tensor_ch,
  14932. int8_t *in_tensor,
  14933. int16_t *in_tmp_buf,
  14934. int8_t *out_tensor)
  14935. {
  14936. #if defined(__zcc__)
  14937. tpt_nn_avgpool_params_act_s8 aPool_params = {stride_x, stride_y, pad_x, pad_y,
  14938. act_min, act_max};
  14939. tpt_nn_avgpool_dims_act_s8 aPool_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
  14940. ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y};
  14941. return tpt_avgpool_s8_any_act(out_tensor, in_tensor, &aPool_params, &aPool_dims, in_tmp_buf);
  14942. #else
  14943. return riscv_nn_avepool_HWC_s8_any_act(
  14944. in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
  14945. stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
  14946. in_tensor_ch, in_tensor, in_tmp_buf, out_tensor);
  14947. #endif
  14948. }
  14949. /**
  14950. * @brief This function is used to obtain the required size, in bytes,
  14951. * for the input temporary buffer of riscv_nn_avepool_HWC_s8_any_act.
  14952. * @param[in] out_tensor_dim_x x dimension of the output tensor
  14953. * @param[in] in_tensor_ch number of input tensor channels
  14954. * @return This function returns the size required by the temporary
  14955. * buffer.
  14956. */
  14957. static inline int32_t hpm_nn_avepool_HWC_s8_any_act_get_buffer_size(const int out_tensor_dim_x, const int in_tensor_ch)
  14958. {
  14959. #if defined(__zcc__)
  14960. return tpt_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
  14961. in_tensor_ch);
  14962. #else
  14963. return riscv_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
  14964. in_tensor_ch);
  14965. #endif
  14966. }
  14967. /**
  14968. * @brief This is a max pooling function for signed 8-bit integer
  14969. * inputs.
  14970. * @param[in] in_tensor pointer of the input tensor
  14971. * @param[in] in_tensor_dim dimension of the input tensor
  14972. * @param[in] in_tensor_ch number of input tensor channels
  14973. * @param[in] ker_dim dimension of the filter kernel
  14974. * @param[in] pad padding size
  14975. * @param[in] stride convolution stride
  14976. * @param[in] out_tensor_dim dimension of the output tensor
  14977. * @param[in] in_tmp_buf dummy
  14978. * @param[out] out_tensor pointer of the output tensor
  14979. *
  14980. * @b Example:
  14981. * @code
  14982. * #define IN_DIM 32
  14983. * #define IN_CH 32
  14984. * #define KER_DIM 3
  14985. * #define PAD 0
  14986. * #define STRIDE 2
  14987. * #define OUT_DIM 15
  14988. *
  14989. * q7_t in_data[IN_CH * IN_DIM * IN_DIM] = {...};
  14990. * q7_t out_data[IN_CH * OUT_DIM * OUT_DIM] = {...};
  14991. *
  14992. * hpm_nn_maxpool_HWC_s8(in_data, IN_DIM, IN_CH, KER_DIM, PAD, STRIDE,
  14993. * OUT_DIM, NULL, out_data);
  14994. * @endcode
  14995. */
  14996. static inline void hpm_nn_maxpool_HWC_s8(q7_t *in_tensor,
  14997. const uint16_t in_tensor_dim,
  14998. const uint16_t in_tensor_ch,
  14999. const uint16_t ker_dim,
  15000. const uint16_t pad,
  15001. const uint16_t stride,
  15002. const uint16_t out_tensor_dim,
  15003. q7_t *in_tmp_buf,
  15004. q7_t *out_tensor)
  15005. {
  15006. #if defined(__zcc__)
  15007. tpt_nn_maxpool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
  15008. stride, out_tensor_dim, in_tmp_buf, out_tensor);
  15009. #else
  15010. riscv_nn_maxpool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
  15011. stride, out_tensor_dim, in_tmp_buf, out_tensor);
  15012. #endif
  15013. }
  15014. /**
  15015. * @brief This is a max pooling function for signed 8-bit integer
  15016. * inputs in any x and y dimensions with the actvating
  15017. * parameters to limit the outputs.
  15018. * @param[in] in_tensor_dim_y y dimension of the input tensor
  15019. * @param[in] in_tensor_dim_x x dimension of the input tensor
  15020. * @param[in] out_tensor_dim_y y dimension of the output tensor
  15021. * @param[in] out_tensor_dim_x x dimension of the output tensor
  15022. * @param[in] stride_y convolution stride in the y dimension
  15023. * @param[in] stride_x convolution stride in the x dimension
  15024. * @param[in] ker_dim_y y dimension of the filter kernel
  15025. * @param[in] ker_dim_x x dimension of the filter kernel
  15026. * @param[in] pad_y padding size in the y dimension
  15027. * @param[in] pad_x padding size in the x dimension
  15028. * @param[in] act_min minimum value that the output tensor is
  15029. * limited to. It should be in the range of
  15030. * -128 to 127.
  15031. * @param[in] act_max maximum value that the output tensor is
  15032. * limited to. It should be in the range of
  15033. * -128 to 127.
  15034. * @param[in] in_tensor_ch number of input tensor channels
  15035. * @param[in] in_tensor pointer of the input tensor
  15036. * @param[in] tmp_buffer dummy
  15037. * @param[in] out_tensor pointer of the output tensor
  15038. * @return This function only returns 0.
  15039. */
  15040. static inline int32_t hpm_nn_maxpool_HWC_s8_any_act(const uint16_t in_tensor_dim_y,
  15041. const uint16_t in_tensor_dim_x,
  15042. const uint16_t out_tensor_dim_y,
  15043. const uint16_t out_tensor_dim_x,
  15044. const uint16_t stride_y,
  15045. const uint16_t stride_x,
  15046. const uint16_t ker_dim_y,
  15047. const uint16_t ker_dim_x,
  15048. const uint16_t pad_y,
  15049. const uint16_t pad_x,
  15050. const int8_t act_min,
  15051. const int8_t act_max,
  15052. const uint16_t in_tensor_ch,
  15053. int8_t *in_tensor,
  15054. int16_t *tmp_buffer,
  15055. int8_t *out_tensor)
  15056. {
  15057. #if defined(__zcc__)
  15058. return tpt_nn_maxpool_HWC_s8_any_act(
  15059. in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
  15060. stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
  15061. in_tensor_ch, in_tensor, tmp_buffer, out_tensor);
  15062. #else
  15063. return riscv_nn_maxpool_HWC_s8_any_act(
  15064. in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
  15065. stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
  15066. in_tensor_ch, in_tensor, tmp_buffer, out_tensor);
  15067. #endif
  15068. }
  15069. /**
  15070. * * @}
  15071. */
  15072. #endif
  15073. #ifdef HPM_EN_MATH_NN_RVP32_LIB
  15074. #if defined(__zcc__)
  15075. #include "tpt_nn_pooling.h"
  15076. #else
  15077. #include "riscv_nn_pooling.h"
  15078. #endif
  15079. /**
  15080. * @brief This is an average pooling function for S8 inputs with any x
  15081. * and y dimension with the actvating parameters to limit the
  15082. * outputs.
  15083. * @param[in] in_tensor_dim_y y dimension of the input tensor
  15084. * @param[in] in_tensor_dim_x x dimension of the input tensor
  15085. * @param[in] out_tensor_dim_y y dimension of the output tensor
  15086. * @param[in] out_tensor_dim_x x dimension of the output tensor
  15087. * @param[in] stride_y convolution stride in the y dimension
  15088. * @param[in] stride_x convolution stride in the x dimension
  15089. * @param[in] ker_dim_y y dimension of the filter kernel
  15090. * @param[in] ker_dim_x x dimension of the filter kernel
  15091. * @param[in] pad_y padding size in the y dimension
  15092. * @param[in] pad_x padding size in the x dimension
  15093. * @param[in] act_min minimum value that the output tensor is
  15094. * limited to. It should be in the range of
  15095. * -128 to 127.
  15096. * @param[in] act_max maximum value that the output tensor is
  15097. * limited to. It should be in the range of
  15098. * -128 to 127.
  15099. * @param[in] in_tensor_ch number of input tensor channels
  15100. * @param[in] in_tensor pointer of the input tensor
  15101. * @param[in] in_tmp_buf temporary buffer for the input tensor.
  15102. * Its needed size could be obtained by
  15103. * calling riscv_nn_avepool_s8_HWC_any_get_buffer_size.
  15104. * @param[out] out_tensor pointer of the output tensor
  15105. * @return This function only returns 0.
  15106. */
  15107. static inline int32_t hpm_nn_avepool_HWC_s8_any_act(const int in_tensor_dim_y,
  15108. const int in_tensor_dim_x,
  15109. const int out_tensor_dim_y,
  15110. const int out_tensor_dim_x,
  15111. const int stride_y,
  15112. const int stride_x,
  15113. const int ker_dim_y,
  15114. const int ker_dim_x,
  15115. const int pad_y,
  15116. const int pad_x,
  15117. const int act_min,
  15118. const int act_max,
  15119. const int in_tensor_ch,
  15120. int8_t *in_tensor,
  15121. int16_t *in_tmp_buf,
  15122. int8_t *out_tensor)
  15123. {
  15124. #if defined(__zcc__)
  15125. tpt_nn_avgpool_params_act_s8 aPool_params = {stride_x, stride_y, pad_x, pad_y,
  15126. act_min, act_max};
  15127. tpt_nn_avgpool_dims_act_s8 aPool_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
  15128. ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y};
  15129. return tpt_avgpool_s8_any_act(out_tensor, in_tensor, &aPool_params, &aPool_dims, in_tmp_buf);
  15130. #else
  15131. return riscv_nn_avepool_HWC_s8_any_act(
  15132. in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
  15133. stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
  15134. in_tensor_ch, in_tensor, in_tmp_buf, out_tensor);
  15135. #endif
  15136. }
  15137. /**
  15138. * @brief This function is used to obtain the required size, in bytes,
  15139. * for the input temporary buffer of riscv_nn_avepool_HWC_s8_any_act.
  15140. * @param[in] out_tensor_dim_x x dimension of the output tensor
  15141. * @param[in] in_tensor_ch number of input tensor channels
  15142. * @return This function returns the size required by the temporary
  15143. * buffer.
  15144. */
  15145. static inline int32_t hpm_nn_avepool_HWC_s8_any_act_get_buffer_size(const int out_tensor_dim_x, const int in_tensor_ch)
  15146. {
  15147. #if defined(__zcc__)
  15148. return tpt_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
  15149. in_tensor_ch);
  15150. #else
  15151. return riscv_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
  15152. in_tensor_ch);
  15153. #endif
  15154. }
  15155. #endif
  15156. #endif
  15157. #ifdef HPM_MATH_NN_SOFTMAX
  15158. #ifdef HPM_EN_MATH_NN_LIB
  15159. #if defined(__zcc__)
  15160. #include "tpt_nn_softmax.h"
  15161. #else
  15162. #include "riscv_nn_softmax.h"
  15163. #endif
  15164. /**
  15165. * @defgroup nnsoftmax NN Softmax Functions
  15166. * @ingroup hpmmath
  15167. * @brief The softmax functions are exponential functions with base 2.
  15168. *
  15169. * @{
  15170. */
  15171. /**
  15172. * @brief This is a softmax function for signed 8-bit integer input
  15173. * vectors.
  15174. * @param[in] in_vec pointer of the input vector
  15175. * @param[in] size number of elements in the input vector
  15176. * @param[out] out_vec pointer of the output vector
  15177. *
  15178. * @b Example:
  15179. * @code
  15180. * #define LENGTH 10
  15181. * q7_t in_data[LENGTH] = {...};
  15182. * q7_t out_data[LENGTH];
  15183. *
  15184. * hpm_nn_softmax_s8_fast(in_data, LENGTH, out_data);
  15185. * @endcode
  15186. */
  15187. static inline void hpm_nn_softmax_s8_fast(const q7_t *in_vec,
  15188. const uint16_t size,
  15189. q7_t *out_vec)
  15190. {
  15191. #if defined(__zcc__)
  15192. tpt_nn_softmax_s8_fast(in_vec, size, out_vec);
  15193. #else
  15194. riscv_nn_softmax_s8_fast(in_vec, size, out_vec);
  15195. #endif
  15196. }
  15197. /**
  15198. * @brief This is a softmax function for signed 16-bit integer input
  15199. * vectors.
  15200. * @param[in] in_vec pointer of the input vector
  15201. * @param[in] size number of elements in the input vector
  15202. * @param[out] out_vec pointer of the output vector
  15203. */
  15204. static inline void hpm_nn_softmax_s16_fast(const q15_t *in_vec,
  15205. const uint16_t size,
  15206. q15_t *out_vec)
  15207. {
  15208. #if defined(__zcc__)
  15209. tpt_nn_softmax_s16_fast(in_vec, size, out_vec);
  15210. #else
  15211. riscv_nn_softmax_s16_fast(in_vec, size, out_vec);
  15212. #endif
  15213. }
  15214. /**
  15215. * @brief This is a softmax function for signed 8-bit integer input
  15216. * tensor with high precision algorithm.
  15217. * @param[in] in_tensor pointer of the input tensor
  15218. * @param[in] in_tensor_row number of rows in the input tensor
  15219. * @param[in] in_tensor_col number of columns in the input tensor
  15220. * @param[in] scale scaling value for input quantization
  15221. * @param[in] lshift left shift amount for input quantization
  15222. * @param[in] diff_min minimum threshold to perform the quantized
  15223. * exponential operation. The difference can be
  15224. * obtained by subtracting the input from the
  15225. * maximum in row.
  15226. * @param[out] out_tensor pointer of the output tensor
  15227. */
  15228. static inline void hpm_nn_softmax_s8_hp(const int8_t *in_tensor,
  15229. const int32_t in_tensor_row,
  15230. const int32_t in_tensor_col,
  15231. const int32_t scale,
  15232. const int32_t lshift,
  15233. const int32_t diff_min,
  15234. int8_t *out_tensor)
  15235. {
  15236. #if defined(__zcc__)
  15237. tpt_softmax_s8_hp(out_tensor, in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
  15238. diff_min);
  15239. #else
  15240. riscv_nn_softmax_s8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
  15241. diff_min, out_tensor);
  15242. #endif
  15243. }
  15244. /**
  15245. * @brief This is a softmax function for unsigned 8-bit integer input
  15246. * tensor with high precision algorithm.
  15247. * @param[in] in_tensor pointer of the input tensor
  15248. * @param[in] in_tensor_row number of rows in the input tensor
  15249. * @param[in] in_tensor_col number of columns in the input tensor
  15250. * @param[in] scale scaling value for input quantization
  15251. * @param[in] lshift left shift amount for input quantization
  15252. * @param[in] diff_min minimum threshold to perform the quantized
  15253. * exponential operation. The difference can be
  15254. * obtained by subtracting the input from the
  15255. * maximum in row.
  15256. * @param[out] out_tensor pointer of the output tensor
  15257. */
  15258. static inline void hpm_nn_softmax_u8_hp(const uint8_t *in_tensor,
  15259. const int32_t in_tensor_row,
  15260. const int32_t in_tensor_col,
  15261. const int32_t scale,
  15262. const int32_t lshift,
  15263. const int32_t diff_min,
  15264. uint8_t *out_tensor)
  15265. {
  15266. #if defined(__zcc__)
  15267. tpt_nn_softmax_u8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
  15268. diff_min, out_tensor);
  15269. #else
  15270. riscv_nn_softmax_u8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
  15271. diff_min, out_tensor);
  15272. #endif
  15273. }
  15274. /**
  15275. * * @}
  15276. */
  15277. #endif
  15278. #ifdef HPM_EN_MATH_NN_RVP32_LIB
  15279. #if defined(__zcc__)
  15280. #include "tpt_nn_softmax.h"
  15281. #else
  15282. #include "riscv_nn_softmax.h"
  15283. #endif
  15284. /**
  15285. * @brief This is a softmax function for signed 8-bit integer input
  15286. * tensor with high precision algorithm.
  15287. * @param[in] in_tensor pointer of the input tensor
  15288. * @param[in] in_tensor_row number of rows in the input tensor
  15289. * @param[in] in_tensor_col number of columns in the input tensor
  15290. * @param[in] scale scaling value for input quantization
  15291. * @param[in] lshift left shift amount for input quantization
  15292. * @param[in] diff_min minimum threshold to perform the quantized
  15293. * exponential operation. The difference can be
  15294. * obtained by subtracting the input from the
  15295. * maximum in row.
  15296. * @param[out] out_tensor pointer of the output tensor
  15297. */
  15298. static inline void hpm_nn_softmax_s8_hp(const int8_t *in_tensor,
  15299. const int32_t in_tensor_row,
  15300. const int32_t in_tensor_col,
  15301. const int32_t scale,
  15302. const int32_t lshift,
  15303. const int32_t diff_min,
  15304. int8_t *out_tensor)
  15305. {
  15306. #if defined(__zcc__)
  15307. tpt_softmax_s8_hp(out_tensor, in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
  15308. diff_min);
  15309. #else
  15310. riscv_nn_softmax_s8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
  15311. diff_min, out_tensor);
  15312. #endif
  15313. }
  15314. #endif
  15315. #endif
  15316. #ifdef HPM_MATH_NN_UTIL
  15317. #ifdef HPM_EN_MATH_NN_LIB
  15318. #if defined(__zcc__)
  15319. #include "tpt_nn_util.h"
  15320. #else
  15321. #include "riscv_nn_util.h"
  15322. #endif
  15323. /**
  15324. * @defgroup nnutils NN Utils Functions
  15325. * @ingroup hpmmath
  15326. * @brief Utils functions are miscellaneous auxiliary tools.
  15327. *
  15328. * @{
  15329. */
  15330. #ifdef __riscv_zfh
  15331. /**
  15332. * @brief This function calculates the base-e exponential values of
  15333. * 16-bit half-precision floating point inputs.
  15334. * @param[in] in_vec pointer of the input vector
  15335. * @param[in] size number of elements in the input vector
  15336. * @param[out] out_vec pointer of the output vector
  15337. * @return This function only returns 0.
  15338. */
  15339. static inline int32_t hpm_nn_exp_f16(const float16_t *in_vec,
  15340. const uint32_t size,
  15341. float16_t *out_vec)
  15342. {
  15343. #if defined(__zcc__)
  15344. return tpt_nn_exp_f16(in_vec, size, out_vec);
  15345. #else
  15346. return riscv_nn_exp_f16(in_vec, size, out_vec);
  15347. #endif
  15348. }
  15349. #endif
  15350. /**
  15351. * @brief This function turns the input tensor into another tensor
  15352. * with the same data but in a different shape.
  15353. * @param[in] in_tensor pointer of the input tensor
  15354. * @param[out] out_tensor pointer of the output tensor
  15355. * @param[in] size size, in bytes, of total input tensor
  15356. *
  15357. * @b Example:
  15358. * @code
  15359. * #define SIZE 1024
  15360. * int8_t in_tensor[SIZE] = {...};
  15361. * int8_t out_tensor[SIZE];
  15362. *
  15363. * hpm_nn_reshape_s8(in_tensor, out_tensor, SIZE);
  15364. * @endcode
  15365. */
  15366. static inline void hpm_nn_reshape_s8(const int8_t *in_tensor,
  15367. int8_t *out_tensor,
  15368. const uint32_t size)
  15369. {
  15370. #if defined(__zcc__)
  15371. tpt_reshape_s8(out_tensor, in_tensor, size);
  15372. #else
  15373. riscv_nn_reshape_s8(in_tensor, out_tensor, size);
  15374. #endif
  15375. }
  15376. /**
  15377. * @brief This function finds the k largest values and their indices
  15378. * from the signed 8-bit integer input vector.
  15379. * @param[in] in_vec pointer of the input vector
  15380. * @param[in] size number of elements in the input vector
  15381. * @param[in] k the number of the largest values to be
  15382. * searched
  15383. * @param[out] val the k largest values in the input vector
  15384. * @param[out] idx the indices of the k largest values in the
  15385. * input vector
  15386. * @return This function only returns 0.
  15387. *
  15388. * @note
  15389. * - If there is a number of elements with the same value, the element with
  15390. * smaller index will be selected with high priority.
  15391. * - The k largest values will be sorted from largest to smallest and stored in
  15392. * "val" output vector. If there is a number of elements with the same value,
  15393. * the elements will be sorted from smallest index to largest index.
  15394. */
  15395. static inline int32_t hpm_nn_top_k_s8(q7_t *in_vec,
  15396. uint32_t size,
  15397. uint32_t k,
  15398. q7_t *val,
  15399. uint32_t *idx)
  15400. {
  15401. #if defined(__zcc__)
  15402. return tpt_nn_top_k_s8(in_vec, size, k, val, idx);
  15403. #else
  15404. return riscv_nn_top_k_s8(in_vec, size, k, val, idx);
  15405. #endif
  15406. }
  15407. #ifdef __riscv_zfh
  15408. /**
  15409. * @brief This function finds the k largest values and their indices
  15410. * from the 16-bit half-precision floating point input vector.
  15411. * @param[in] in_vec pointer of the input tensor
  15412. * @param[in] size number of elements in the input vector
  15413. * @param[in] k the number of the largest values to be
  15414. * searched
  15415. * @param[out] val the k largest values in the input vector
  15416. * @param[out] idx the indices of the k largest values in the
  15417. * input vector
  15418. * @return This function only returns 0.
  15419. *
  15420. * @note
  15421. * - If there is a number of elements with the same value, the element with
  15422. * smaller index will be selected with high priority.
  15423. * - The k largest values will be sorted from largest to smallest and stored in
  15424. * "val" output vector. If there is a number of elements with the same value,
  15425. * the elements will be sorted from smallest index to largest index.
  15426. */
  15427. static inline int32_t hpm_nn_top_k_f16(float16_t *in_vec,
  15428. uint32_t size,
  15429. uint32_t k,
  15430. float16_t *val,
  15431. uint32_t *idx)
  15432. {
  15433. #if defined(__zcc__)
  15434. return tpt_nn_top_k_f16(in_vec, size, k, val, idx);
  15435. #else
  15436. return riscv_nn_top_k_f16(in_vec, size, k, val, idx);
  15437. #endif
  15438. }
  15439. #endif
  15440. /**
  15441. * * @}
  15442. */
  15443. #endif
  15444. #ifdef HPM_EN_MATH_NN_RVP32_LIB
  15445. #if defined(__zcc__)
  15446. #include "tpt_nn_util.h"
  15447. #else
  15448. #include "riscv_nn_util.h"
  15449. #endif
  15450. /**
  15451. * @brief This function turns the input tensor into another tensor
  15452. * with the same data but in a different shape.
  15453. * @param[in] in_tensor pointer of the input tensor
  15454. * @param[out] out_tensor pointer of the output tensor
  15455. * @param[in] size size, in bytes, of total input tensor
  15456. *
  15457. * @b Example:
  15458. * @code
  15459. * #define SIZE 1024
  15460. * int8_t in_tensor[SIZE] = {...};
  15461. * int8_t out_tensor[SIZE];
  15462. *
  15463. * hpm_nn_reshape_s8(in_tensor, out_tensor, SIZE);
  15464. * @endcode
  15465. */
  15466. static inline void hpm_nn_reshape_s8(const int8_t *in_tensor,
  15467. int8_t *out_tensor,
  15468. const uint32_t size)
  15469. {
  15470. #if defined(__zcc__)
  15471. tpt_reshape_s8(out_tensor, in_tensor, size);
  15472. #else
  15473. riscv_nn_reshape_s8(in_tensor, out_tensor, size);
  15474. #endif
  15475. }
  15476. #endif
  15477. /**
  15478. * @}
  15479. */
  15480. #endif
  15481. #ifdef __cplusplus
  15482. }
  15483. #endif
  15484. #endif